diff options
Diffstat (limited to 'compiler/utils')
75 files changed, 19145 insertions, 8591 deletions
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index e5f91dc8ca..d5cd59d481 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -376,500 +376,6 @@ void ArmAssembler::Pad(uint32_t bytes) { } } -static dwarf::Reg DWARFReg(Register reg) { - return dwarf::Reg::ArmCore(static_cast<int>(reg)); -} - -static dwarf::Reg DWARFReg(SRegister reg) { - return dwarf::Reg::ArmFp(static_cast<int>(reg)); -} - -constexpr size_t kFramePointerSize = kArmPointerSize; - -void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) { - CHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet - CHECK_ALIGNED(frame_size, kStackAlignment); - CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister()); - - // Push callee saves and link register. - RegList core_spill_mask = 1 << LR; - uint32_t fp_spill_mask = 0; - for (const ManagedRegister& reg : callee_save_regs) { - if (reg.AsArm().IsCoreRegister()) { - core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); - } else { - fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); - } - } - PushList(core_spill_mask); - cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); - cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize); - if (fp_spill_mask != 0) { - vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); - cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); - cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize); - } - - // Increase frame to required size. - int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); - CHECK_GT(frame_size, pushed_values * kFramePointerSize); // Must at least have space for Method*. - IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. - - // Write out Method*. - StoreToOffset(kStoreWord, R0, SP, 0); - - // Write out entry spills. - int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - ArmManagedRegister reg = entry_spills.at(i).AsArm(); - if (reg.IsNoRegister()) { - // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); - offset += spill.getSize(); - } else if (reg.IsCoreRegister()) { - StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset); - offset += 4; - } else if (reg.IsSRegister()) { - StoreSToOffset(reg.AsSRegister(), SP, offset); - offset += 4; - } else if (reg.IsDRegister()) { - StoreDToOffset(reg.AsDRegister(), SP, offset); - offset += 8; - } - } -} - -void ArmAssembler::RemoveFrame(size_t frame_size, - const std::vector<ManagedRegister>& callee_save_regs) { - CHECK_ALIGNED(frame_size, kStackAlignment); - cfi_.RememberState(); - - // Compute callee saves to pop and PC. - RegList core_spill_mask = 1 << PC; - uint32_t fp_spill_mask = 0; - for (const ManagedRegister& reg : callee_save_regs) { - if (reg.AsArm().IsCoreRegister()) { - core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); - } else { - fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); - } - } - - // Decrease frame to start of callee saves. - int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); - CHECK_GT(frame_size, pop_values * kFramePointerSize); - DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. - - if (fp_spill_mask != 0) { - vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); - cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); - cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask); - } - - // Pop callee saves and PC. - PopList(core_spill_mask); - - // The CFI should be restored for any code that follows the exit block. - cfi_.RestoreState(); - cfi_.DefCFAOffset(frame_size); -} - -void ArmAssembler::IncreaseFrameSize(size_t adjust) { - AddConstant(SP, -adjust); - cfi_.AdjustCFAOffset(adjust); -} - -void ArmAssembler::DecreaseFrameSize(size_t adjust) { - AddConstant(SP, adjust); - cfi_.AdjustCFAOffset(-adjust); -} - -void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { - ArmManagedRegister src = msrc.AsArm(); - if (src.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (src.IsCoreRegister()) { - CHECK_EQ(4u, size); - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); - } else if (src.IsRegisterPair()) { - CHECK_EQ(8u, size); - StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value()); - StoreToOffset(kStoreWord, src.AsRegisterPairHigh(), - SP, dest.Int32Value() + 4); - } else if (src.IsSRegister()) { - StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value()); - } else { - CHECK(src.IsDRegister()) << src; - StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value()); - } -} - -void ArmAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - ArmManagedRegister src = msrc.AsArm(); - CHECK(src.IsCoreRegister()) << src; - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); -} - -void ArmAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { - ArmManagedRegister src = msrc.AsArm(); - CHECK(src.IsCoreRegister()) << src; - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); -} - -void ArmAssembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc, - FrameOffset in_off, ManagedRegister mscratch) { - ArmManagedRegister src = msrc.AsArm(); - ArmManagedRegister scratch = mscratch.AsArm(); - StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4); -} - -void ArmAssembler::CopyRef(FrameOffset dest, FrameOffset src, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); -} - -void ArmAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) { - ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), - base.AsArm().AsCoreRegister(), offs.Int32Value()); - if (unpoison_reference) { - MaybeUnpoisonHeapReference(dst.AsCoreRegister()); - } -} - -void ArmAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { - ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value()); -} - -void ArmAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, - Offset offs) { - ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), - base.AsArm().AsCoreRegister(), offs.Int32Value()); -} - -void ArmAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - LoadImmediate(scratch.AsCoreRegister(), imm); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); -} - -void ArmAssembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - LoadImmediate(scratch.AsCoreRegister(), imm); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, dest.Int32Value()); -} - -static void EmitLoad(ArmAssembler* assembler, ManagedRegister m_dst, - Register src_register, int32_t src_offset, size_t size) { - ArmManagedRegister dst = m_dst.AsArm(); - if (dst.IsNoRegister()) { - CHECK_EQ(0u, size) << dst; - } else if (dst.IsCoreRegister()) { - CHECK_EQ(4u, size) << dst; - assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset); - } else if (dst.IsRegisterPair()) { - CHECK_EQ(8u, size) << dst; - assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset); - assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4); - } else if (dst.IsSRegister()) { - assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset); - } else { - CHECK(dst.IsDRegister()) << dst; - assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset); - } -} - -void ArmAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { - return EmitLoad(this, m_dst, SP, src.Int32Value(), size); -} - -void ArmAssembler::LoadFromThread32(ManagedRegister m_dst, ThreadOffset<4> src, size_t size) { - return EmitLoad(this, m_dst, TR, src.Int32Value(), size); -} - -void ArmAssembler::LoadRawPtrFromThread32(ManagedRegister m_dst, ThreadOffset<4> offs) { - ArmManagedRegister dst = m_dst.AsArm(); - CHECK(dst.IsCoreRegister()) << dst; - LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value()); -} - -void ArmAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs, - ThreadOffset<4> thr_offs, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - TR, thr_offs.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), - SP, fr_offs.Int32Value()); -} - -void ArmAssembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - SP, fr_offs.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), - TR, thr_offs.Int32Value()); -} - -void ArmAssembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), - TR, thr_offs.Int32Value()); -} - -void ArmAssembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) { - StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value()); -} - -void ArmAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; -} - -void ArmAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; -} - -void ArmAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) { - ArmManagedRegister dst = m_dst.AsArm(); - ArmManagedRegister src = m_src.AsArm(); - if (!dst.Equals(src)) { - if (dst.IsCoreRegister()) { - CHECK(src.IsCoreRegister()) << src; - mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister())); - } else if (dst.IsDRegister()) { - CHECK(src.IsDRegister()) << src; - vmovd(dst.AsDRegister(), src.AsDRegister()); - } else if (dst.IsSRegister()) { - CHECK(src.IsSRegister()) << src; - vmovs(dst.AsSRegister(), src.AsSRegister()); - } else { - CHECK(dst.IsRegisterPair()) << dst; - CHECK(src.IsRegisterPair()) << src; - // Ensure that the first move doesn't clobber the input of the second. - if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) { - mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); - mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); - } else { - mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); - mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); - } - } - } -} - -void ArmAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); - } else if (size == 8) { - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4); - } -} - -void ArmAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, - ManagedRegister mscratch, size_t size) { - Register scratch = mscratch.AsArm().AsCoreRegister(); - CHECK_EQ(size, 4u); - LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value()); - StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value()); -} - -void ArmAssembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, - ManagedRegister mscratch, size_t size) { - Register scratch = mscratch.AsArm().AsCoreRegister(); - CHECK_EQ(size, 4u); - LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value()); - StoreToOffset(kStoreWord, scratch, dest_base.AsArm().AsCoreRegister(), dest_offset.Int32Value()); -} - -void ArmAssembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void ArmAssembler::Copy(ManagedRegister dest, Offset dest_offset, - ManagedRegister src, Offset src_offset, - ManagedRegister mscratch, size_t size) { - CHECK_EQ(size, 4u); - Register scratch = mscratch.AsArm().AsCoreRegister(); - LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value()); - StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value()); -} - -void ArmAssembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset /*src_offset*/, - ManagedRegister /*scratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, - FrameOffset handle_scope_offset, - ManagedRegister min_reg, bool null_allowed) { - ArmManagedRegister out_reg = mout_reg.AsArm(); - ArmManagedRegister in_reg = min_reg.AsArm(); - CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg; - CHECK(out_reg.IsCoreRegister()) << out_reg; - if (null_allowed) { - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) - if (in_reg.IsNoRegister()) { - LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), - SP, handle_scope_offset.Int32Value()); - in_reg = out_reg; - } - cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); - if (!out_reg.Equals(in_reg)) { - it(EQ, kItElse); - LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); - } else { - it(NE); - } - AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); - } else { - AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); - } -} - -void ArmAssembler::CreateHandleScopeEntry(FrameOffset out_off, - FrameOffset handle_scope_offset, - ManagedRegister mscratch, - bool null_allowed) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - if (null_allowed) { - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, - handle_scope_offset.Int32Value()); - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) - cmp(scratch.AsCoreRegister(), ShifterOperand(0)); - it(NE); - AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); - } else { - AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); - } - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value()); -} - -void ArmAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, - ManagedRegister min_reg) { - ArmManagedRegister out_reg = mout_reg.AsArm(); - ArmManagedRegister in_reg = min_reg.AsArm(); - CHECK(out_reg.IsCoreRegister()) << out_reg; - CHECK(in_reg.IsCoreRegister()) << in_reg; - Label null_arg; - if (!out_reg.Equals(in_reg)) { - LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); // TODO: why EQ? - } - cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); - it(NE); - LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), - in_reg.AsCoreRegister(), 0, NE); -} - -void ArmAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void ArmAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void ArmAssembler::Call(ManagedRegister mbase, Offset offset, - ManagedRegister mscratch) { - ArmManagedRegister base = mbase.AsArm(); - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(base.IsCoreRegister()) << base; - CHECK(scratch.IsCoreRegister()) << scratch; - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - base.AsCoreRegister(), offset.Int32Value()); - blx(scratch.AsCoreRegister()); - // TODO: place reference map on call. -} - -void ArmAssembler::Call(FrameOffset base, Offset offset, - ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; - // Call *(*(SP + base) + offset) - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - SP, base.Int32Value()); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - scratch.AsCoreRegister(), offset.Int32Value()); - blx(scratch.AsCoreRegister()); - // TODO: place reference map on call -} - -void ArmAssembler::CallFromThread32(ThreadOffset<4> /*offset*/, ManagedRegister /*scratch*/) { - UNIMPLEMENTED(FATAL); -} - -void ArmAssembler::GetCurrentThread(ManagedRegister tr) { - mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR)); -} - -void ArmAssembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*scratch*/) { - StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL); -} - -void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { - ArmManagedRegister scratch = mscratch.AsArm(); - ArmExceptionSlowPath* slow = new (GetArena()) ArmExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - TR, Thread::ExceptionOffset<4>().Int32Value()); - cmp(scratch.AsCoreRegister(), ShifterOperand(0)); - b(slow->Entry(), NE); -} - -void ArmExceptionSlowPath::Emit(Assembler* sasm) { - ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); - } - // Pass exception object as argument. - // Don't care about preserving R0 as this call won't return. - __ mov(R0, ShifterOperand(scratch_.AsCoreRegister())); - // Set up call to Thread::Current()->pDeliverException. - __ LoadFromOffset(kLoadWord, R12, TR, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value()); - __ blx(R12); -#undef __ -} - - static int LeadingZeros(uint32_t val) { uint32_t alt; int32_t n; diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index ffbe786bf4..0ed8a35338 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -23,18 +23,20 @@ #include "base/arena_allocator.h" #include "base/arena_containers.h" #include "base/bit_utils.h" +#include "base/enums.h" #include "base/logging.h" #include "base/stl_util.h" #include "base/value_object.h" #include "constants_arm.h" +#include "utils/arm/assembler_arm_shared.h" #include "utils/arm/managed_register_arm.h" #include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" #include "offsets.h" namespace art { namespace arm { -class Arm32Assembler; class Thumb2Assembler; // Assembler literal is a value embedded in code, retrieved using a PC-relative load. @@ -206,7 +208,6 @@ class ShifterOperand { uint32_t rotate_; uint32_t immed_; - friend class Arm32Assembler; friend class Thumb2Assembler; #ifdef SOURCE_ASSEMBLER_SUPPORT @@ -214,29 +215,6 @@ class ShifterOperand { #endif }; - -enum LoadOperandType { - kLoadSignedByte, - kLoadUnsignedByte, - kLoadSignedHalfword, - kLoadUnsignedHalfword, - kLoadWord, - kLoadWordPair, - kLoadSWord, - kLoadDWord -}; - - -enum StoreOperandType { - kStoreByte, - kStoreHalfword, - kStoreWord, - kStoreWordPair, - kStoreSWord, - kStoreDWord -}; - - // Load/store multiple addressing mode. enum BlockAddressMode { // bit encoding P U W @@ -268,7 +246,7 @@ class Address : public ValueObject { NegPostIndex = (0|0|0) << 21 // negative post-indexed with writeback }; - Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), rm_(R0), + explicit Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), rm_(R0), offset_(offset), am_(am), is_immed_offset_(true), shift_(LSL) { } @@ -284,12 +262,6 @@ class Address : public ValueObject { CHECK_NE(rm, PC); } - // LDR(literal) - pc relative load. - explicit Address(int32_t offset) : - rn_(PC), rm_(R0), offset_(offset), - am_(Offset), is_immed_offset_(false), shift_(LSL) { - } - static bool CanHoldLoadOffsetArm(LoadOperandType type, int offset); static bool CanHoldStoreOffsetArm(StoreOperandType type, int offset); @@ -417,13 +389,6 @@ enum ItState { kItE = kItElse }; -// Set condition codes request. -enum SetCc { - kCcDontCare, // Allows prioritizing 16-bit instructions on Thumb2 whether they set CCs or not. - kCcSet, - kCcKeep, -}; - constexpr uint32_t kNoItCondition = 3; constexpr uint32_t kInvalidModifiedImmediate = -1; @@ -671,10 +636,15 @@ class ArmAssembler : public Assembler { virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0; virtual void vmstat(Condition cond = AL) = 0; // VMRS APSR_nzcv, FPSCR + virtual void vcntd(DRegister dd, DRegister dm) = 0; + virtual void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) = 0; + virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0; virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0; virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0; virtual void vpopd(DRegister reg, int nregs, Condition cond = AL) = 0; + virtual void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0; + virtual void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0; // Branch instructions. virtual void b(Label* label, Condition cond = AL) = 0; @@ -751,32 +721,7 @@ class ArmAssembler : public Assembler { } } - void LoadDImmediate(DRegister sd, double value, Condition cond = AL) { - if (!vmovd(sd, value, cond)) { - uint64_t int_value = bit_cast<uint64_t, double>(value); - if (int_value == bit_cast<uint64_t, double>(0.0)) { - // 0.0 is quite common, so we special case it by loading - // 2.0 in `sd` and then substracting it. - bool success = vmovd(sd, 2.0, cond); - CHECK(success); - vsubd(sd, sd, sd, cond); - } else { - if (sd < 16) { - SRegister low = static_cast<SRegister>(sd << 1); - SRegister high = static_cast<SRegister>(low + 1); - LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond); - if (High32Bits(int_value) == Low32Bits(int_value)) { - vmovs(high, low); - } else { - LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond); - } - } else { - LOG(FATAL) << "Unimplemented loading of double into a D register " - << "that cannot be split into two S registers"; - } - } - } - } + virtual void LoadDImmediate(DRegister dd, double value, Condition cond = AL) = 0; virtual void MarkExceptionHandler(Label* label) = 0; virtual void LoadFromOffset(LoadOperandType type, @@ -812,6 +757,9 @@ class ArmAssembler : public Assembler { virtual void PushList(RegList regs, Condition cond = AL) = 0; virtual void PopList(RegList regs, Condition cond = AL) = 0; + virtual void StoreList(RegList regs, size_t stack_offset) = 0; + virtual void LoadList(RegList regs, size_t stack_offset) = 0; + virtual void Mov(Register rd, Register rm, Condition cond = AL) = 0; // Convenience shift instructions. Use mov instruction with shifter operand @@ -902,121 +850,6 @@ class ArmAssembler : public Assembler { virtual void CompareAndBranchIfZero(Register r, Label* label) = 0; virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0; - // - // Overridden common assembler high-level functionality - // - - // Emit code that will create an activation on the stack - void BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - - // Emit code that will remove an activation from the stack - void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) - OVERRIDE; - - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; - - // Store routines - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; - - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; - - void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister scratch) - OVERRIDE; - - void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; - - void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE; - - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; - - // Load routines - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; - - void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size) OVERRIDE; - - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; - - void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; - - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; - - void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs) OVERRIDE; - - // Copying routines - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; - - void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs, - ManagedRegister scratch) OVERRIDE; - - void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; - - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - // Sign extension - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Zero extension - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Exploit fast access in managed code to Thread::Current() - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; - - // Set up out_reg to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. in_reg holds a possibly stale reference - // that can be used to avoid loading the handle scope entry to see if the value is - // null. - void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; - - // Set up out_off to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. - void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister scratch, bool null_allowed) OVERRIDE; - - // src holds a handle scope entry (Object**) load this into dst - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; - - // Heap::VerifyObject on src. In some cases (such as a reference to this) we - // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - - // Call to address held at [base+offset] - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch) OVERRIDE; - - // Generate code to check if Thread::Current()->exception_ is non-null - // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - static uint32_t ModifiedImmediate(uint32_t value); static bool IsLowRegister(Register r) { @@ -1041,6 +874,12 @@ class ArmAssembler : public Assembler { // reg = -reg. rsb(reg, reg, ShifterOperand(0)); } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(Register reg) { if (kPoisonHeapReferences) { @@ -1094,18 +933,6 @@ class ArmAssembler : public Assembler { ArenaVector<Label*> tracked_labels_; }; -// Slowpath entered when Thread::Current()->_exception is non-null -class ArmExceptionSlowPath FINAL : public SlowPath { - public: - ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) { - } - void Emit(Assembler *sp_asm) OVERRIDE; - private: - const ArmManagedRegister scratch_; - const size_t stack_adjust_; -}; - } // namespace arm } // namespace art diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc deleted file mode 100644 index 0a227b21cd..0000000000 --- a/compiler/utils/arm/assembler_arm32.cc +++ /dev/null @@ -1,1666 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "assembler_arm32.h" - -#include "base/bit_utils.h" -#include "base/logging.h" -#include "entrypoints/quick/quick_entrypoints.h" -#include "offsets.h" -#include "thread.h" - -namespace art { -namespace arm { - -bool Arm32Assembler::ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op) { - // Avoid the more expensive test for frequent small immediate values. - if (immediate < (1 << kImmed8Bits)) { - shifter_op->type_ = ShifterOperand::kImmediate; - shifter_op->is_rotate_ = true; - shifter_op->rotate_ = 0; - shifter_op->immed_ = immediate; - return true; - } - // Note that immediate must be unsigned for the test to work correctly. - for (int rot = 0; rot < 16; rot++) { - uint32_t imm8 = (immediate << 2*rot) | (immediate >> (32 - 2*rot)); - if (imm8 < (1 << kImmed8Bits)) { - shifter_op->type_ = ShifterOperand::kImmediate; - shifter_op->is_rotate_ = true; - shifter_op->rotate_ = rot; - shifter_op->immed_ = imm8; - return true; - } - } - return false; -} - -bool Arm32Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) { - ShifterOperand shifter_op; - return ShifterOperandCanHoldArm32(immediate, &shifter_op); -} - -bool Arm32Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, - Register rn ATTRIBUTE_UNUSED, - Opcode opcode ATTRIBUTE_UNUSED, - uint32_t immediate, - SetCc set_cc ATTRIBUTE_UNUSED, - ShifterOperand* shifter_op) { - return ShifterOperandCanHoldArm32(immediate, shifter_op); -} - -void Arm32Assembler::and_(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), AND, set_cc, rn, rd, so); -} - - -void Arm32Assembler::eor(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), EOR, set_cc, rn, rd, so); -} - - -void Arm32Assembler::sub(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), SUB, set_cc, rn, rd, so); -} - -void Arm32Assembler::rsb(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), RSB, set_cc, rn, rd, so); -} - -void Arm32Assembler::add(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), ADD, set_cc, rn, rd, so); -} - - -void Arm32Assembler::adc(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), ADC, set_cc, rn, rd, so); -} - - -void Arm32Assembler::sbc(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), SBC, set_cc, rn, rd, so); -} - - -void Arm32Assembler::rsc(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), RSC, set_cc, rn, rd, so); -} - - -void Arm32Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) { - CHECK_NE(rn, PC); // Reserve tst pc instruction for exception handler marker. - EmitType01(cond, so.type(), TST, kCcSet, rn, R0, so); -} - - -void Arm32Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) { - CHECK_NE(rn, PC); // Reserve teq pc instruction for exception handler marker. - EmitType01(cond, so.type(), TEQ, kCcSet, rn, R0, so); -} - - -void Arm32Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) { - EmitType01(cond, so.type(), CMP, kCcSet, rn, R0, so); -} - - -void Arm32Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) { - EmitType01(cond, so.type(), CMN, kCcSet, rn, R0, so); -} - - -void Arm32Assembler::orr(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), ORR, set_cc, rn, rd, so); -} - - -void Arm32Assembler::orn(Register rd ATTRIBUTE_UNUSED, - Register rn ATTRIBUTE_UNUSED, - const ShifterOperand& so ATTRIBUTE_UNUSED, - Condition cond ATTRIBUTE_UNUSED, - SetCc set_cc ATTRIBUTE_UNUSED) { - LOG(FATAL) << "orn is not supported on ARM32"; -} - - -void Arm32Assembler::mov(Register rd, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), MOV, set_cc, R0, rd, so); -} - - -void Arm32Assembler::bic(Register rd, Register rn, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), BIC, set_cc, rn, rd, so); -} - - -void Arm32Assembler::mvn(Register rd, const ShifterOperand& so, - Condition cond, SetCc set_cc) { - EmitType01(cond, so.type(), MVN, set_cc, R0, rd, so); -} - - -void Arm32Assembler::mul(Register rd, Register rn, Register rm, Condition cond) { - // Assembler registers rd, rn, rm are encoded as rn, rm, rs. - EmitMulOp(cond, 0, R0, rd, rn, rm); -} - - -void Arm32Assembler::mla(Register rd, Register rn, Register rm, Register ra, - Condition cond) { - // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd. - EmitMulOp(cond, B21, ra, rd, rn, rm); -} - - -void Arm32Assembler::mls(Register rd, Register rn, Register rm, Register ra, - Condition cond) { - // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd. - EmitMulOp(cond, B22 | B21, ra, rd, rn, rm); -} - - -void Arm32Assembler::smull(Register rd_lo, Register rd_hi, Register rn, - Register rm, Condition cond) { - // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs. - EmitMulOp(cond, B23 | B22, rd_lo, rd_hi, rn, rm); -} - - -void Arm32Assembler::umull(Register rd_lo, Register rd_hi, Register rn, - Register rm, Condition cond) { - // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs. - EmitMulOp(cond, B23, rd_lo, rd_hi, rn, rm); -} - - -void Arm32Assembler::sdiv(Register rd, Register rn, Register rm, Condition cond) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rn, kNoRegister); - CHECK_NE(rm, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = B26 | B25 | B24 | B20 | - B15 | B14 | B13 | B12 | - (static_cast<int32_t>(cond) << kConditionShift) | - (static_cast<int32_t>(rn) << 0) | - (static_cast<int32_t>(rd) << 16) | - (static_cast<int32_t>(rm) << 8) | - B4; - Emit(encoding); -} - - -void Arm32Assembler::udiv(Register rd, Register rn, Register rm, Condition cond) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rn, kNoRegister); - CHECK_NE(rm, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = B26 | B25 | B24 | B21 | B20 | - B15 | B14 | B13 | B12 | - (static_cast<int32_t>(cond) << kConditionShift) | - (static_cast<int32_t>(rn) << 0) | - (static_cast<int32_t>(rd) << 16) | - (static_cast<int32_t>(rm) << 8) | - B4; - Emit(encoding); -} - - -void Arm32Assembler::sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rn, kNoRegister); - CHECK_NE(cond, kNoCondition); - CHECK_LE(lsb, 31U); - CHECK(1U <= width && width <= 32U) << width; - uint32_t widthminus1 = width - 1; - - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B26 | B25 | B24 | B23 | B21 | - (widthminus1 << 16) | - (static_cast<uint32_t>(rd) << 12) | - (lsb << 7) | - B6 | B4 | - static_cast<uint32_t>(rn); - Emit(encoding); -} - - -void Arm32Assembler::ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rn, kNoRegister); - CHECK_NE(cond, kNoCondition); - CHECK_LE(lsb, 31U); - CHECK(1U <= width && width <= 32U) << width; - uint32_t widthminus1 = width - 1; - - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B26 | B25 | B24 | B23 | B22 | B21 | - (widthminus1 << 16) | - (static_cast<uint32_t>(rd) << 12) | - (lsb << 7) | - B6 | B4 | - static_cast<uint32_t>(rn); - Emit(encoding); -} - - -void Arm32Assembler::ldr(Register rd, const Address& ad, Condition cond) { - EmitMemOp(cond, true, false, rd, ad); -} - - -void Arm32Assembler::str(Register rd, const Address& ad, Condition cond) { - EmitMemOp(cond, false, false, rd, ad); -} - - -void Arm32Assembler::ldrb(Register rd, const Address& ad, Condition cond) { - EmitMemOp(cond, true, true, rd, ad); -} - - -void Arm32Assembler::strb(Register rd, const Address& ad, Condition cond) { - EmitMemOp(cond, false, true, rd, ad); -} - - -void Arm32Assembler::ldrh(Register rd, const Address& ad, Condition cond) { - EmitMemOpAddressMode3(cond, L | B7 | H | B4, rd, ad); -} - - -void Arm32Assembler::strh(Register rd, const Address& ad, Condition cond) { - EmitMemOpAddressMode3(cond, B7 | H | B4, rd, ad); -} - - -void Arm32Assembler::ldrsb(Register rd, const Address& ad, Condition cond) { - EmitMemOpAddressMode3(cond, L | B7 | B6 | B4, rd, ad); -} - - -void Arm32Assembler::ldrsh(Register rd, const Address& ad, Condition cond) { - EmitMemOpAddressMode3(cond, L | B7 | B6 | H | B4, rd, ad); -} - - -void Arm32Assembler::ldrd(Register rd, const Address& ad, Condition cond) { - CHECK_EQ(rd % 2, 0); - EmitMemOpAddressMode3(cond, B7 | B6 | B4, rd, ad); -} - - -void Arm32Assembler::strd(Register rd, const Address& ad, Condition cond) { - CHECK_EQ(rd % 2, 0); - EmitMemOpAddressMode3(cond, B7 | B6 | B5 | B4, rd, ad); -} - - -void Arm32Assembler::ldm(BlockAddressMode am, - Register base, - RegList regs, - Condition cond) { - EmitMultiMemOp(cond, am, true, base, regs); -} - - -void Arm32Assembler::stm(BlockAddressMode am, - Register base, - RegList regs, - Condition cond) { - EmitMultiMemOp(cond, am, false, base, regs); -} - - -void Arm32Assembler::vmovs(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vmovd(DRegister dd, DRegister dm, Condition cond) { - EmitVFPddd(cond, B23 | B21 | B20 | B6, dd, D0, dm); -} - - -bool Arm32Assembler::vmovs(SRegister sd, float s_imm, Condition cond) { - uint32_t imm32 = bit_cast<uint32_t, float>(s_imm); - if (((imm32 & ((1 << 19) - 1)) == 0) && - ((((imm32 >> 25) & ((1 << 6) - 1)) == (1 << 5)) || - (((imm32 >> 25) & ((1 << 6) - 1)) == ((1 << 5) -1)))) { - uint8_t imm8 = ((imm32 >> 31) << 7) | (((imm32 >> 29) & 1) << 6) | - ((imm32 >> 19) & ((1 << 6) -1)); - EmitVFPsss(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | (imm8 & 0xf), - sd, S0, S0); - return true; - } - return false; -} - - -bool Arm32Assembler::vmovd(DRegister dd, double d_imm, Condition cond) { - uint64_t imm64 = bit_cast<uint64_t, double>(d_imm); - if (((imm64 & ((1LL << 48) - 1)) == 0) && - ((((imm64 >> 54) & ((1 << 9) - 1)) == (1 << 8)) || - (((imm64 >> 54) & ((1 << 9) - 1)) == ((1 << 8) -1)))) { - uint8_t imm8 = ((imm64 >> 63) << 7) | (((imm64 >> 61) & 1) << 6) | - ((imm64 >> 48) & ((1 << 6) -1)); - EmitVFPddd(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | B8 | (imm8 & 0xf), - dd, D0, D0); - return true; - } - return false; -} - - -void Arm32Assembler::vadds(SRegister sd, SRegister sn, SRegister sm, - Condition cond) { - EmitVFPsss(cond, B21 | B20, sd, sn, sm); -} - - -void Arm32Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm, - Condition cond) { - EmitVFPddd(cond, B21 | B20, dd, dn, dm); -} - - -void Arm32Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm, - Condition cond) { - EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm); -} - - -void Arm32Assembler::vsubd(DRegister dd, DRegister dn, DRegister dm, - Condition cond) { - EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm); -} - - -void Arm32Assembler::vmuls(SRegister sd, SRegister sn, SRegister sm, - Condition cond) { - EmitVFPsss(cond, B21, sd, sn, sm); -} - - -void Arm32Assembler::vmuld(DRegister dd, DRegister dn, DRegister dm, - Condition cond) { - EmitVFPddd(cond, B21, dd, dn, dm); -} - - -void Arm32Assembler::vmlas(SRegister sd, SRegister sn, SRegister sm, - Condition cond) { - EmitVFPsss(cond, 0, sd, sn, sm); -} - - -void Arm32Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm, - Condition cond) { - EmitVFPddd(cond, 0, dd, dn, dm); -} - - -void Arm32Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm, - Condition cond) { - EmitVFPsss(cond, B6, sd, sn, sm); -} - - -void Arm32Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm, - Condition cond) { - EmitVFPddd(cond, B6, dd, dn, dm); -} - - -void Arm32Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm, - Condition cond) { - EmitVFPsss(cond, B23, sd, sn, sm); -} - - -void Arm32Assembler::vdivd(DRegister dd, DRegister dn, DRegister dm, - Condition cond) { - EmitVFPddd(cond, B23, dd, dn, dm); -} - - -void Arm32Assembler::vabss(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B7 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vabsd(DRegister dd, DRegister dm, Condition cond) { - EmitVFPddd(cond, B23 | B21 | B20 | B7 | B6, dd, D0, dm); -} - - -void Arm32Assembler::vnegs(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B16 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vnegd(DRegister dd, DRegister dm, Condition cond) { - EmitVFPddd(cond, B23 | B21 | B20 | B16 | B6, dd, D0, dm); -} - - -void Arm32Assembler::vsqrts(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B16 | B7 | B6, sd, S0, sm); -} - -void Arm32Assembler::vsqrtd(DRegister dd, DRegister dm, Condition cond) { - EmitVFPddd(cond, B23 | B21 | B20 | B16 | B7 | B6, dd, D0, dm); -} - - -void Arm32Assembler::vcvtsd(SRegister sd, DRegister dm, Condition cond) { - EmitVFPsd(cond, B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6, sd, dm); -} - - -void Arm32Assembler::vcvtds(DRegister dd, SRegister sm, Condition cond) { - EmitVFPds(cond, B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6, dd, sm); -} - - -void Arm32Assembler::vcvtis(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vcvtid(SRegister sd, DRegister dm, Condition cond) { - EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6, sd, dm); -} - - -void Arm32Assembler::vcvtsi(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B19 | B7 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vcvtdi(DRegister dd, SRegister sm, Condition cond) { - EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B7 | B6, dd, sm); -} - - -void Arm32Assembler::vcvtus(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B7 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vcvtud(SRegister sd, DRegister dm, Condition cond) { - EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6, sd, dm); -} - - -void Arm32Assembler::vcvtsu(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B19 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vcvtdu(DRegister dd, SRegister sm, Condition cond) { - EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B6, dd, sm); -} - - -void Arm32Assembler::vcmps(SRegister sd, SRegister sm, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B18 | B6, sd, S0, sm); -} - - -void Arm32Assembler::vcmpd(DRegister dd, DRegister dm, Condition cond) { - EmitVFPddd(cond, B23 | B21 | B20 | B18 | B6, dd, D0, dm); -} - - -void Arm32Assembler::vcmpsz(SRegister sd, Condition cond) { - EmitVFPsss(cond, B23 | B21 | B20 | B18 | B16 | B6, sd, S0, S0); -} - - -void Arm32Assembler::vcmpdz(DRegister dd, Condition cond) { - EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0); -} - -void Arm32Assembler::b(Label* label, Condition cond) { - EmitBranch(cond, label, false); -} - - -void Arm32Assembler::bl(Label* label, Condition cond) { - EmitBranch(cond, label, true); -} - - -void Arm32Assembler::MarkExceptionHandler(Label* label) { - EmitType01(AL, 1, TST, kCcSet, PC, R0, ShifterOperand(0)); - Label l; - b(&l); - EmitBranch(AL, label, false); - Bind(&l); -} - - -void Arm32Assembler::Emit(int32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<int32_t>(value); -} - - -void Arm32Assembler::EmitType01(Condition cond, - int type, - Opcode opcode, - SetCc set_cc, - Register rn, - Register rd, - const ShifterOperand& so) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = static_cast<int32_t>(cond) << kConditionShift | - type << kTypeShift | - static_cast<int32_t>(opcode) << kOpcodeShift | - (set_cc == kCcSet ? 1 : 0) << kSShift | - static_cast<int32_t>(rn) << kRnShift | - static_cast<int32_t>(rd) << kRdShift | - so.encodingArm(); - Emit(encoding); -} - - -void Arm32Assembler::EmitType5(Condition cond, int offset, bool link) { - CHECK_NE(cond, kNoCondition); - int32_t encoding = static_cast<int32_t>(cond) << kConditionShift | - 5 << kTypeShift | - (link ? 1 : 0) << kLinkShift; - Emit(Arm32Assembler::EncodeBranchOffset(offset, encoding)); -} - - -void Arm32Assembler::EmitMemOp(Condition cond, - bool load, - bool byte, - Register rd, - const Address& ad) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(cond, kNoCondition); - const Address& addr = static_cast<const Address&>(ad); - - int32_t encoding = 0; - if (!ad.IsImmediate() && ad.GetRegisterOffset() == PC) { - // PC relative LDR(literal) - int32_t offset = ad.GetOffset(); - int32_t u = B23; - if (offset < 0) { - offset = -offset; - u = 0; - } - CHECK_LT(offset, (1 << 12)); - encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B26 | B24 | u | B20 | - (load ? L : 0) | - (byte ? B : 0) | - (static_cast<int32_t>(rd) << kRdShift) | - 0xf << 16 | - (offset & 0xfff); - - } else { - encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B26 | - (load ? L : 0) | - (byte ? B : 0) | - (static_cast<int32_t>(rd) << kRdShift) | - addr.encodingArm(); - } - Emit(encoding); -} - - -void Arm32Assembler::EmitMemOpAddressMode3(Condition cond, - int32_t mode, - Register rd, - const Address& ad) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(cond, kNoCondition); - const Address& addr = static_cast<const Address&>(ad); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B22 | - mode | - (static_cast<int32_t>(rd) << kRdShift) | - addr.encoding3(); - Emit(encoding); -} - - -void Arm32Assembler::EmitMultiMemOp(Condition cond, - BlockAddressMode am, - bool load, - Register base, - RegList regs) { - CHECK_NE(base, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | - am | - (load ? L : 0) | - (static_cast<int32_t>(base) << kRnShift) | - regs; - Emit(encoding); -} - - -void Arm32Assembler::EmitShiftImmediate(Condition cond, - Shift opcode, - Register rd, - Register rm, - const ShifterOperand& so) { - CHECK_NE(cond, kNoCondition); - CHECK(so.IsImmediate()); - int32_t encoding = static_cast<int32_t>(cond) << kConditionShift | - static_cast<int32_t>(MOV) << kOpcodeShift | - static_cast<int32_t>(rd) << kRdShift | - so.encodingArm() << kShiftImmShift | - static_cast<int32_t>(opcode) << kShiftShift | - static_cast<int32_t>(rm); - Emit(encoding); -} - - -void Arm32Assembler::EmitShiftRegister(Condition cond, - Shift opcode, - Register rd, - Register rm, - const ShifterOperand& so) { - CHECK_NE(cond, kNoCondition); - CHECK(so.IsRegister()); - int32_t encoding = static_cast<int32_t>(cond) << kConditionShift | - static_cast<int32_t>(MOV) << kOpcodeShift | - static_cast<int32_t>(rd) << kRdShift | - so.encodingArm() << kShiftRegisterShift | - static_cast<int32_t>(opcode) << kShiftShift | - B4 | - static_cast<int32_t>(rm); - Emit(encoding); -} - - -void Arm32Assembler::EmitBranch(Condition cond, Label* label, bool link) { - if (label->IsBound()) { - EmitType5(cond, label->Position() - buffer_.Size(), link); - } else { - int position = buffer_.Size(); - // Use the offset field of the branch instruction for linking the sites. - EmitType5(cond, label->position_, link); - label->LinkTo(position); - } -} - - -void Arm32Assembler::clz(Register rd, Register rm, Condition cond) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rm, kNoRegister); - CHECK_NE(cond, kNoCondition); - CHECK_NE(rd, PC); - CHECK_NE(rm, PC); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B24 | B22 | B21 | (0xf << 16) | - (static_cast<int32_t>(rd) << kRdShift) | - (0xf << 8) | B4 | static_cast<int32_t>(rm); - Emit(encoding); -} - - -void Arm32Assembler::movw(Register rd, uint16_t imm16, Condition cond) { - CHECK_NE(cond, kNoCondition); - int32_t encoding = static_cast<int32_t>(cond) << kConditionShift | - B25 | B24 | ((imm16 >> 12) << 16) | - static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff); - Emit(encoding); -} - - -void Arm32Assembler::movt(Register rd, uint16_t imm16, Condition cond) { - CHECK_NE(cond, kNoCondition); - int32_t encoding = static_cast<int32_t>(cond) << kConditionShift | - B25 | B24 | B22 | ((imm16 >> 12) << 16) | - static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff); - Emit(encoding); -} - - -void Arm32Assembler::EmitMiscellaneous(Condition cond, uint8_t op1, - uint8_t op2, uint32_t a_part, - uint32_t rest) { - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B26 | B25 | B23 | - (op1 << 20) | - (a_part << 16) | - (op2 << 5) | - B4 | - rest; - Emit(encoding); -} - - -void Arm32Assembler::EmitReverseBytes(Register rd, Register rm, Condition cond, - uint8_t op1, uint8_t op2) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rm, kNoRegister); - CHECK_NE(cond, kNoCondition); - CHECK_NE(rd, PC); - CHECK_NE(rm, PC); - - int32_t encoding = (static_cast<int32_t>(rd) << kRdShift) | - (0b1111 << 8) | - static_cast<int32_t>(rm); - EmitMiscellaneous(cond, op1, op2, 0b1111, encoding); -} - - -void Arm32Assembler::rbit(Register rd, Register rm, Condition cond) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rm, kNoRegister); - CHECK_NE(cond, kNoCondition); - CHECK_NE(rd, PC); - CHECK_NE(rm, PC); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B26 | B25 | B23 | B22 | B21 | B20 | (0xf << 16) | - (static_cast<int32_t>(rd) << kRdShift) | - (0xf << 8) | B5 | B4 | static_cast<int32_t>(rm); - Emit(encoding); -} - - -void Arm32Assembler::rev(Register rd, Register rm, Condition cond) { - EmitReverseBytes(rd, rm, cond, 0b011, 0b001); -} - - -void Arm32Assembler::rev16(Register rd, Register rm, Condition cond) { - EmitReverseBytes(rd, rm, cond, 0b011, 0b101); -} - - -void Arm32Assembler::revsh(Register rd, Register rm, Condition cond) { - EmitReverseBytes(rd, rm, cond, 0b111, 0b101); -} - - -void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode, - Register rd, Register rn, - Register rm, Register rs) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rn, kNoRegister); - CHECK_NE(rm, kNoRegister); - CHECK_NE(rs, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = opcode | - (static_cast<int32_t>(cond) << kConditionShift) | - (static_cast<int32_t>(rn) << kRnShift) | - (static_cast<int32_t>(rd) << kRdShift) | - (static_cast<int32_t>(rs) << kRsShift) | - B7 | B4 | - (static_cast<int32_t>(rm) << kRmShift); - Emit(encoding); -} - - -void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) { - CHECK_NE(rn, kNoRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B24 | - B23 | - L | - (static_cast<int32_t>(rn) << kLdExRnShift) | - (static_cast<int32_t>(rt) << kLdExRtShift) | - B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0; - Emit(encoding); -} - - -void Arm32Assembler::ldrexd(Register rt, Register rt2, Register rn, Condition cond) { - CHECK_NE(rn, kNoRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt2, kNoRegister); - CHECK_NE(rt, R14); - CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); - CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); - CHECK_NE(cond, kNoCondition); - - int32_t encoding = - (static_cast<uint32_t>(cond) << kConditionShift) | - B24 | B23 | B21 | B20 | - static_cast<uint32_t>(rn) << 16 | - static_cast<uint32_t>(rt) << 12 | - B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0; - Emit(encoding); -} - - -void Arm32Assembler::strex(Register rd, - Register rt, - Register rn, - Condition cond) { - CHECK_NE(rn, kNoRegister); - CHECK_NE(rd, kNoRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B24 | - B23 | - (static_cast<int32_t>(rn) << kStrExRnShift) | - (static_cast<int32_t>(rd) << kStrExRdShift) | - B11 | B10 | B9 | B8 | B7 | B4 | - (static_cast<int32_t>(rt) << kStrExRtShift); - Emit(encoding); -} - -void Arm32Assembler::strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond) { - CHECK_NE(rd, kNoRegister); - CHECK_NE(rn, kNoRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt2, kNoRegister); - CHECK_NE(rt, R14); - CHECK_NE(rd, rt); - CHECK_NE(rd, rt2); - CHECK_EQ(0u, static_cast<uint32_t>(rt) % 2); - CHECK_EQ(static_cast<uint32_t>(rt) + 1, static_cast<uint32_t>(rt2)); - CHECK_NE(cond, kNoCondition); - - int32_t encoding = - (static_cast<uint32_t>(cond) << kConditionShift) | - B24 | B23 | B21 | - static_cast<uint32_t>(rn) << 16 | - static_cast<uint32_t>(rd) << 12 | - B11 | B10 | B9 | B8 | B7 | B4 | - static_cast<uint32_t>(rt); - Emit(encoding); -} - - -void Arm32Assembler::clrex(Condition cond) { - CHECK_EQ(cond, AL); // This cannot be conditional on ARM. - int32_t encoding = (kSpecialCondition << kConditionShift) | - B26 | B24 | B22 | B21 | B20 | (0xff << 12) | B4 | 0xf; - Emit(encoding); -} - - -void Arm32Assembler::nop(Condition cond) { - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B25 | B24 | B21 | (0xf << 12); - Emit(encoding); -} - - -void Arm32Assembler::vmovsr(SRegister sn, Register rt, Condition cond) { - CHECK_NE(sn, kNoSRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt, SP); - CHECK_NE(rt, PC); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B25 | - ((static_cast<int32_t>(sn) >> 1)*B16) | - (static_cast<int32_t>(rt)*B12) | B11 | B9 | - ((static_cast<int32_t>(sn) & 1)*B7) | B4; - Emit(encoding); -} - - -void Arm32Assembler::vmovrs(Register rt, SRegister sn, Condition cond) { - CHECK_NE(sn, kNoSRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt, SP); - CHECK_NE(rt, PC); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B25 | B20 | - ((static_cast<int32_t>(sn) >> 1)*B16) | - (static_cast<int32_t>(rt)*B12) | B11 | B9 | - ((static_cast<int32_t>(sn) & 1)*B7) | B4; - Emit(encoding); -} - - -void Arm32Assembler::vmovsrr(SRegister sm, Register rt, Register rt2, - Condition cond) { - CHECK_NE(sm, kNoSRegister); - CHECK_NE(sm, S31); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt, SP); - CHECK_NE(rt, PC); - CHECK_NE(rt2, kNoRegister); - CHECK_NE(rt2, SP); - CHECK_NE(rt2, PC); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B22 | - (static_cast<int32_t>(rt2)*B16) | - (static_cast<int32_t>(rt)*B12) | B11 | B9 | - ((static_cast<int32_t>(sm) & 1)*B5) | B4 | - (static_cast<int32_t>(sm) >> 1); - Emit(encoding); -} - - -void Arm32Assembler::vmovrrs(Register rt, Register rt2, SRegister sm, - Condition cond) { - CHECK_NE(sm, kNoSRegister); - CHECK_NE(sm, S31); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt, SP); - CHECK_NE(rt, PC); - CHECK_NE(rt2, kNoRegister); - CHECK_NE(rt2, SP); - CHECK_NE(rt2, PC); - CHECK_NE(rt, rt2); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B22 | B20 | - (static_cast<int32_t>(rt2)*B16) | - (static_cast<int32_t>(rt)*B12) | B11 | B9 | - ((static_cast<int32_t>(sm) & 1)*B5) | B4 | - (static_cast<int32_t>(sm) >> 1); - Emit(encoding); -} - - -void Arm32Assembler::vmovdrr(DRegister dm, Register rt, Register rt2, - Condition cond) { - CHECK_NE(dm, kNoDRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt, SP); - CHECK_NE(rt, PC); - CHECK_NE(rt2, kNoRegister); - CHECK_NE(rt2, SP); - CHECK_NE(rt2, PC); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B22 | - (static_cast<int32_t>(rt2)*B16) | - (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 | - ((static_cast<int32_t>(dm) >> 4)*B5) | B4 | - (static_cast<int32_t>(dm) & 0xf); - Emit(encoding); -} - - -void Arm32Assembler::vmovrrd(Register rt, Register rt2, DRegister dm, - Condition cond) { - CHECK_NE(dm, kNoDRegister); - CHECK_NE(rt, kNoRegister); - CHECK_NE(rt, SP); - CHECK_NE(rt, PC); - CHECK_NE(rt2, kNoRegister); - CHECK_NE(rt2, SP); - CHECK_NE(rt2, PC); - CHECK_NE(rt, rt2); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B22 | B20 | - (static_cast<int32_t>(rt2)*B16) | - (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 | - ((static_cast<int32_t>(dm) >> 4)*B5) | B4 | - (static_cast<int32_t>(dm) & 0xf); - Emit(encoding); -} - - -void Arm32Assembler::vldrs(SRegister sd, const Address& ad, Condition cond) { - const Address& addr = static_cast<const Address&>(ad); - CHECK_NE(sd, kNoSRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B24 | B20 | - ((static_cast<int32_t>(sd) & 1)*B22) | - ((static_cast<int32_t>(sd) >> 1)*B12) | - B11 | B9 | addr.vencoding(); - Emit(encoding); -} - - -void Arm32Assembler::vstrs(SRegister sd, const Address& ad, Condition cond) { - const Address& addr = static_cast<const Address&>(ad); - CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC); - CHECK_NE(sd, kNoSRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B24 | - ((static_cast<int32_t>(sd) & 1)*B22) | - ((static_cast<int32_t>(sd) >> 1)*B12) | - B11 | B9 | addr.vencoding(); - Emit(encoding); -} - - -void Arm32Assembler::vldrd(DRegister dd, const Address& ad, Condition cond) { - const Address& addr = static_cast<const Address&>(ad); - CHECK_NE(dd, kNoDRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B24 | B20 | - ((static_cast<int32_t>(dd) >> 4)*B22) | - ((static_cast<int32_t>(dd) & 0xf)*B12) | - B11 | B9 | B8 | addr.vencoding(); - Emit(encoding); -} - - -void Arm32Assembler::vstrd(DRegister dd, const Address& ad, Condition cond) { - const Address& addr = static_cast<const Address&>(ad); - CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC); - CHECK_NE(dd, kNoDRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B24 | - ((static_cast<int32_t>(dd) >> 4)*B22) | - ((static_cast<int32_t>(dd) & 0xf)*B12) | - B11 | B9 | B8 | addr.vencoding(); - Emit(encoding); -} - - -void Arm32Assembler::vpushs(SRegister reg, int nregs, Condition cond) { - EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, false, cond); -} - - -void Arm32Assembler::vpushd(DRegister reg, int nregs, Condition cond) { - EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, true, cond); -} - - -void Arm32Assembler::vpops(SRegister reg, int nregs, Condition cond) { - EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, false, cond); -} - - -void Arm32Assembler::vpopd(DRegister reg, int nregs, Condition cond) { - EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, true, cond); -} - - -void Arm32Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) { - CHECK_NE(cond, kNoCondition); - CHECK_GT(nregs, 0); - uint32_t D; - uint32_t Vd; - if (dbl) { - // Encoded as D:Vd. - D = (reg >> 4) & 1; - Vd = reg & 15U /* 0b1111 */; - } else { - // Encoded as Vd:D. - D = reg & 1; - Vd = (reg >> 1) & 15U /* 0b1111 */; - } - int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 | - B11 | B9 | - (dbl ? B8 : 0) | - (push ? B24 : (B23 | B20)) | - static_cast<int32_t>(cond) << kConditionShift | - nregs << (dbl ? 1 : 0) | - D << 22 | - Vd << 12; - Emit(encoding); -} - - -void Arm32Assembler::EmitVFPsss(Condition cond, int32_t opcode, - SRegister sd, SRegister sn, SRegister sm) { - CHECK_NE(sd, kNoSRegister); - CHECK_NE(sn, kNoSRegister); - CHECK_NE(sm, kNoSRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B25 | B11 | B9 | opcode | - ((static_cast<int32_t>(sd) & 1)*B22) | - ((static_cast<int32_t>(sn) >> 1)*B16) | - ((static_cast<int32_t>(sd) >> 1)*B12) | - ((static_cast<int32_t>(sn) & 1)*B7) | - ((static_cast<int32_t>(sm) & 1)*B5) | - (static_cast<int32_t>(sm) >> 1); - Emit(encoding); -} - - -void Arm32Assembler::EmitVFPddd(Condition cond, int32_t opcode, - DRegister dd, DRegister dn, DRegister dm) { - CHECK_NE(dd, kNoDRegister); - CHECK_NE(dn, kNoDRegister); - CHECK_NE(dm, kNoDRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B25 | B11 | B9 | B8 | opcode | - ((static_cast<int32_t>(dd) >> 4)*B22) | - ((static_cast<int32_t>(dn) & 0xf)*B16) | - ((static_cast<int32_t>(dd) & 0xf)*B12) | - ((static_cast<int32_t>(dn) >> 4)*B7) | - ((static_cast<int32_t>(dm) >> 4)*B5) | - (static_cast<int32_t>(dm) & 0xf); - Emit(encoding); -} - - -void Arm32Assembler::EmitVFPsd(Condition cond, int32_t opcode, - SRegister sd, DRegister dm) { - CHECK_NE(sd, kNoSRegister); - CHECK_NE(dm, kNoDRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B25 | B11 | B9 | opcode | - ((static_cast<int32_t>(sd) & 1)*B22) | - ((static_cast<int32_t>(sd) >> 1)*B12) | - ((static_cast<int32_t>(dm) >> 4)*B5) | - (static_cast<int32_t>(dm) & 0xf); - Emit(encoding); -} - - -void Arm32Assembler::EmitVFPds(Condition cond, int32_t opcode, - DRegister dd, SRegister sm) { - CHECK_NE(dd, kNoDRegister); - CHECK_NE(sm, kNoSRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B25 | B11 | B9 | opcode | - ((static_cast<int32_t>(dd) >> 4)*B22) | - ((static_cast<int32_t>(dd) & 0xf)*B12) | - ((static_cast<int32_t>(sm) & 1)*B5) | - (static_cast<int32_t>(sm) >> 1); - Emit(encoding); -} - - -void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm, - Condition cond, SetCc set_cc) { - CHECK_LE(shift_imm, 31u); - mov(rd, ShifterOperand(rm, LSL, shift_imm), cond, set_cc); -} - - -void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm, - Condition cond, SetCc set_cc) { - CHECK(1u <= shift_imm && shift_imm <= 32u); - if (shift_imm == 32) shift_imm = 0; // Comply to UAL syntax. - mov(rd, ShifterOperand(rm, LSR, shift_imm), cond, set_cc); -} - - -void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm, - Condition cond, SetCc set_cc) { - CHECK(1u <= shift_imm && shift_imm <= 32u); - if (shift_imm == 32) shift_imm = 0; // Comply to UAL syntax. - mov(rd, ShifterOperand(rm, ASR, shift_imm), cond, set_cc); -} - - -void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm, - Condition cond, SetCc set_cc) { - CHECK(1u <= shift_imm && shift_imm <= 31u); - mov(rd, ShifterOperand(rm, ROR, shift_imm), cond, set_cc); -} - -void Arm32Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) { - mov(rd, ShifterOperand(rm, ROR, 0), cond, set_cc); -} - - -void Arm32Assembler::Lsl(Register rd, Register rm, Register rn, - Condition cond, SetCc set_cc) { - mov(rd, ShifterOperand(rm, LSL, rn), cond, set_cc); -} - - -void Arm32Assembler::Lsr(Register rd, Register rm, Register rn, - Condition cond, SetCc set_cc) { - mov(rd, ShifterOperand(rm, LSR, rn), cond, set_cc); -} - - -void Arm32Assembler::Asr(Register rd, Register rm, Register rn, - Condition cond, SetCc set_cc) { - mov(rd, ShifterOperand(rm, ASR, rn), cond, set_cc); -} - - -void Arm32Assembler::Ror(Register rd, Register rm, Register rn, - Condition cond, SetCc set_cc) { - mov(rd, ShifterOperand(rm, ROR, rn), cond, set_cc); -} - -void Arm32Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B27 | B26 | B25 | B23 | B22 | B21 | B20 | B16 | - (static_cast<int32_t>(PC)*B12) | - B11 | B9 | B4; - Emit(encoding); -} - - -void Arm32Assembler::svc(uint32_t imm24) { - CHECK(IsUint<24>(imm24)) << imm24; - int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24; - Emit(encoding); -} - - -void Arm32Assembler::bkpt(uint16_t imm16) { - int32_t encoding = (AL << kConditionShift) | B24 | B21 | - ((imm16 >> 4) << 8) | B6 | B5 | B4 | (imm16 & 0xf); - Emit(encoding); -} - - -void Arm32Assembler::blx(Register rm, Condition cond) { - CHECK_NE(rm, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B24 | B21 | (0xfff << 8) | B5 | B4 | - (static_cast<int32_t>(rm) << kRmShift); - Emit(encoding); -} - - -void Arm32Assembler::bx(Register rm, Condition cond) { - CHECK_NE(rm, kNoRegister); - CHECK_NE(cond, kNoCondition); - int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) | - B24 | B21 | (0xfff << 8) | B4 | - (static_cast<int32_t>(rm) << kRmShift); - Emit(encoding); -} - - -void Arm32Assembler::Push(Register rd, Condition cond) { - str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond); -} - - -void Arm32Assembler::Pop(Register rd, Condition cond) { - ldr(rd, Address(SP, kRegisterSize, Address::PostIndex), cond); -} - - -void Arm32Assembler::PushList(RegList regs, Condition cond) { - stm(DB_W, SP, regs, cond); -} - - -void Arm32Assembler::PopList(RegList regs, Condition cond) { - ldm(IA_W, SP, regs, cond); -} - - -void Arm32Assembler::Mov(Register rd, Register rm, Condition cond) { - if (rd != rm) { - mov(rd, ShifterOperand(rm), cond); - } -} - - -void Arm32Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - int bound_pc = buffer_.Size(); - while (label->IsLinked()) { - int32_t position = label->Position(); - int32_t next = buffer_.Load<int32_t>(position); - int32_t encoded = Arm32Assembler::EncodeBranchOffset(bound_pc - position, next); - buffer_.Store<int32_t>(position, encoded); - label->position_ = Arm32Assembler::DecodeBranchOffset(next); - } - label->BindTo(bound_pc); -} - - -int32_t Arm32Assembler::EncodeBranchOffset(int offset, int32_t inst) { - // The offset is off by 8 due to the way the ARM CPUs read PC. - offset -= 8; - CHECK_ALIGNED(offset, 4); - CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset; - - // Properly preserve only the bits supported in the instruction. - offset >>= 2; - offset &= kBranchOffsetMask; - return (inst & ~kBranchOffsetMask) | offset; -} - - -int Arm32Assembler::DecodeBranchOffset(int32_t inst) { - // Sign-extend, left-shift by 2, then add 8. - return ((((inst & kBranchOffsetMask) << 8) >> 6) + 8); -} - - -uint32_t Arm32Assembler::GetAdjustedPosition(uint32_t old_position ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unimplemented."; - UNREACHABLE(); -} - -Literal* Arm32Assembler::NewLiteral(size_t size ATTRIBUTE_UNUSED, - const uint8_t* data ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unimplemented."; - UNREACHABLE(); -} - -void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, - Literal* literal ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unimplemented."; - UNREACHABLE(); -} - -void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, Register rt2 ATTRIBUTE_UNUSED, - Literal* literal ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unimplemented."; - UNREACHABLE(); -} - -void Arm32Assembler::LoadLiteral(SRegister sd ATTRIBUTE_UNUSED, - Literal* literal ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unimplemented."; - UNREACHABLE(); -} - -void Arm32Assembler::LoadLiteral(DRegister dd ATTRIBUTE_UNUSED, - Literal* literal ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unimplemented."; - UNREACHABLE(); -} - - -void Arm32Assembler::AddConstant(Register rd, Register rn, int32_t value, - Condition cond, SetCc set_cc) { - if (value == 0 && set_cc != kCcSet) { - if (rd != rn) { - mov(rd, ShifterOperand(rn), cond, set_cc); - } - return; - } - // We prefer to select the shorter code sequence rather than selecting add for - // positive values and sub for negatives ones, which would slightly improve - // the readability of generated code for some constants. - ShifterOperand shifter_op; - if (ShifterOperandCanHoldArm32(value, &shifter_op)) { - add(rd, rn, shifter_op, cond, set_cc); - } else if (ShifterOperandCanHoldArm32(-value, &shifter_op)) { - sub(rd, rn, shifter_op, cond, set_cc); - } else { - CHECK(rn != IP); - if (ShifterOperandCanHoldArm32(~value, &shifter_op)) { - mvn(IP, shifter_op, cond, kCcKeep); - add(rd, rn, ShifterOperand(IP), cond, set_cc); - } else if (ShifterOperandCanHoldArm32(~(-value), &shifter_op)) { - mvn(IP, shifter_op, cond, kCcKeep); - sub(rd, rn, ShifterOperand(IP), cond, set_cc); - } else { - movw(IP, Low16Bits(value), cond); - uint16_t value_high = High16Bits(value); - if (value_high != 0) { - movt(IP, value_high, cond); - } - add(rd, rn, ShifterOperand(IP), cond, set_cc); - } - } -} - -void Arm32Assembler::CmpConstant(Register rn, int32_t value, Condition cond) { - ShifterOperand shifter_op; - if (ShifterOperandCanHoldArm32(value, &shifter_op)) { - cmp(rn, shifter_op, cond); - } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) { - cmn(rn, shifter_op, cond); - } else { - movw(IP, Low16Bits(value), cond); - uint16_t value_high = High16Bits(value); - if (value_high != 0) { - movt(IP, value_high, cond); - } - cmp(rn, ShifterOperand(IP), cond); - } -} - -void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) { - ShifterOperand shifter_op; - if (ShifterOperandCanHoldArm32(value, &shifter_op)) { - mov(rd, shifter_op, cond); - } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) { - mvn(rd, shifter_op, cond); - } else { - movw(rd, Low16Bits(value), cond); - uint16_t value_high = High16Bits(value); - if (value_high != 0) { - movt(rd, value_high, cond); - } - } -} - - -// Implementation note: this method must emit at most one instruction when -// Address::CanHoldLoadOffsetArm. -void Arm32Assembler::LoadFromOffset(LoadOperandType type, - Register reg, - Register base, - int32_t offset, - Condition cond) { - if (!Address::CanHoldLoadOffsetArm(type, offset)) { - CHECK(base != IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; - } - CHECK(Address::CanHoldLoadOffsetArm(type, offset)); - switch (type) { - case kLoadSignedByte: - ldrsb(reg, Address(base, offset), cond); - break; - case kLoadUnsignedByte: - ldrb(reg, Address(base, offset), cond); - break; - case kLoadSignedHalfword: - ldrsh(reg, Address(base, offset), cond); - break; - case kLoadUnsignedHalfword: - ldrh(reg, Address(base, offset), cond); - break; - case kLoadWord: - ldr(reg, Address(base, offset), cond); - break; - case kLoadWordPair: - ldrd(reg, Address(base, offset), cond); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); - } -} - - -// Implementation note: this method must emit at most one instruction when -// Address::CanHoldLoadOffsetArm, as expected by JIT::GuardedLoadFromOffset. -void Arm32Assembler::LoadSFromOffset(SRegister reg, - Register base, - int32_t offset, - Condition cond) { - if (!Address::CanHoldLoadOffsetArm(kLoadSWord, offset)) { - CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; - } - CHECK(Address::CanHoldLoadOffsetArm(kLoadSWord, offset)); - vldrs(reg, Address(base, offset), cond); -} - - -// Implementation note: this method must emit at most one instruction when -// Address::CanHoldLoadOffsetArm, as expected by JIT::GuardedLoadFromOffset. -void Arm32Assembler::LoadDFromOffset(DRegister reg, - Register base, - int32_t offset, - Condition cond) { - if (!Address::CanHoldLoadOffsetArm(kLoadDWord, offset)) { - CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; - } - CHECK(Address::CanHoldLoadOffsetArm(kLoadDWord, offset)); - vldrd(reg, Address(base, offset), cond); -} - - -// Implementation note: this method must emit at most one instruction when -// Address::CanHoldStoreOffsetArm. -void Arm32Assembler::StoreToOffset(StoreOperandType type, - Register reg, - Register base, - int32_t offset, - Condition cond) { - if (!Address::CanHoldStoreOffsetArm(type, offset)) { - CHECK(reg != IP); - CHECK(base != IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; - } - CHECK(Address::CanHoldStoreOffsetArm(type, offset)); - switch (type) { - case kStoreByte: - strb(reg, Address(base, offset), cond); - break; - case kStoreHalfword: - strh(reg, Address(base, offset), cond); - break; - case kStoreWord: - str(reg, Address(base, offset), cond); - break; - case kStoreWordPair: - strd(reg, Address(base, offset), cond); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); - } -} - - -// Implementation note: this method must emit at most one instruction when -// Address::CanHoldStoreOffsetArm, as expected by JIT::GuardedStoreToOffset. -void Arm32Assembler::StoreSToOffset(SRegister reg, - Register base, - int32_t offset, - Condition cond) { - if (!Address::CanHoldStoreOffsetArm(kStoreSWord, offset)) { - CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; - } - CHECK(Address::CanHoldStoreOffsetArm(kStoreSWord, offset)); - vstrs(reg, Address(base, offset), cond); -} - - -// Implementation note: this method must emit at most one instruction when -// Address::CanHoldStoreOffsetArm, as expected by JIT::GuardedStoreSToOffset. -void Arm32Assembler::StoreDToOffset(DRegister reg, - Register base, - int32_t offset, - Condition cond) { - if (!Address::CanHoldStoreOffsetArm(kStoreDWord, offset)) { - CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; - } - CHECK(Address::CanHoldStoreOffsetArm(kStoreDWord, offset)); - vstrd(reg, Address(base, offset), cond); -} - - -void Arm32Assembler::MemoryBarrier(ManagedRegister mscratch) { - CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12); - dmb(SY); -} - - -void Arm32Assembler::dmb(DmbOptions flavor) { - int32_t encoding = 0xf57ff05f; // dmb - Emit(encoding | flavor); -} - - -void Arm32Assembler::cbz(Register rn ATTRIBUTE_UNUSED, Label* target ATTRIBUTE_UNUSED) { - LOG(FATAL) << "cbz is not supported on ARM32"; -} - - -void Arm32Assembler::cbnz(Register rn ATTRIBUTE_UNUSED, Label* target ATTRIBUTE_UNUSED) { - LOG(FATAL) << "cbnz is not supported on ARM32"; -} - - -void Arm32Assembler::CompareAndBranchIfZero(Register r, Label* label) { - cmp(r, ShifterOperand(0)); - b(label, EQ); -} - - -void Arm32Assembler::CompareAndBranchIfNonZero(Register r, Label* label) { - cmp(r, ShifterOperand(0)); - b(label, NE); -} - -JumpTable* Arm32Assembler::CreateJumpTable(std::vector<Label*>&& labels ATTRIBUTE_UNUSED, - Register base_reg ATTRIBUTE_UNUSED) { - LOG(FATAL) << "CreateJumpTable is not supported on ARM32"; - UNREACHABLE(); -} - -void Arm32Assembler::EmitJumpTableDispatch(JumpTable* jump_table ATTRIBUTE_UNUSED, - Register displacement_reg ATTRIBUTE_UNUSED) { - LOG(FATAL) << "EmitJumpTableDispatch is not supported on ARM32"; - UNREACHABLE(); -} - -void Arm32Assembler::FinalizeCode() { - ArmAssembler::FinalizeCode(); - // Currently the arm32 assembler does not support fixups, and thus no tracking. We must not call - // FinalizeTrackedLabels(), which would lead to an abort. -} - -} // namespace arm -} // namespace art diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h deleted file mode 100644 index bc6020e008..0000000000 --- a/compiler/utils/arm/assembler_arm32.h +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_ -#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_ - -#include <vector> - -#include "base/logging.h" -#include "constants_arm.h" -#include "utils/arm/managed_register_arm.h" -#include "utils/arm/assembler_arm.h" -#include "offsets.h" - -namespace art { -namespace arm { - -class Arm32Assembler FINAL : public ArmAssembler { - public: - explicit Arm32Assembler(ArenaAllocator* arena) : ArmAssembler(arena) {} - virtual ~Arm32Assembler() {} - - bool IsThumb() const OVERRIDE { - return false; - } - - // Data-processing instructions. - virtual void and_(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void eor(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void sub(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void rsb(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void add(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void adc(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void sbc(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void rsc(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; - - void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; - - void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; - - void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; - - virtual void orr(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void orn(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void mov(Register rd, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void bic(Register rd, Register rn, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void mvn(Register rd, const ShifterOperand& so, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - // Miscellaneous data-processing instructions. - void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE; - void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; - void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE; - void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE; - void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE; - void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE; - void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE; - - // Multiply instructions. - void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; - void mla(Register rd, Register rn, Register rm, Register ra, - Condition cond = AL) OVERRIDE; - void mls(Register rd, Register rn, Register rm, Register ra, - Condition cond = AL) OVERRIDE; - void smull(Register rd_lo, Register rd_hi, Register rn, Register rm, - Condition cond = AL) OVERRIDE; - void umull(Register rd_lo, Register rd_hi, Register rn, Register rm, - Condition cond = AL) OVERRIDE; - - void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; - void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE; - - // Bit field extract instructions. - void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; - void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE; - - // Load/store instructions. - void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - - void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - - void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - - void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - - void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; - - void ldm(BlockAddressMode am, Register base, - RegList regs, Condition cond = AL) OVERRIDE; - void stm(BlockAddressMode am, Register base, - RegList regs, Condition cond = AL) OVERRIDE; - - void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE; - void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE; - void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; - void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE; - - // Miscellaneous instructions. - void clrex(Condition cond = AL) OVERRIDE; - void nop(Condition cond = AL) OVERRIDE; - - // Note that gdb sets breakpoints using the undefined instruction 0xe7f001f0. - void bkpt(uint16_t imm16) OVERRIDE; - void svc(uint32_t imm24) OVERRIDE; - - void cbz(Register rn, Label* target) OVERRIDE; - void cbnz(Register rn, Label* target) OVERRIDE; - - // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles). - void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE; - void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE; - void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; - void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE; - void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE; - void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE; - void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; - - // Returns false if the immediate cannot be encoded. - bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE; - bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE; - - void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; - void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE; - void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; - void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE; - - void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; - void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; - void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; - void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; - void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; - void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; - void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; - void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; - void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; - void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; - void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE; - void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE; - - void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; - void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; - void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; - - void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; - void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; - void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; - void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; - void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE; - void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE; - - void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE; - void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE; - void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE; - void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE; - void vmstat(Condition cond = AL) OVERRIDE; // VMRS APSR_nzcv, FPSCR - - void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; - void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; - void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; - void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; - - // Branch instructions. - void b(Label* label, Condition cond = AL) OVERRIDE; - void bl(Label* label, Condition cond = AL) OVERRIDE; - void blx(Register rm, Condition cond = AL) OVERRIDE; - void bx(Register rm, Condition cond = AL) OVERRIDE; - virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - virtual void Asr(Register rd, Register rm, uint32_t shift_imm, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - virtual void Ror(Register rd, Register rm, uint32_t shift_imm, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - virtual void Rrx(Register rd, Register rm, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - virtual void Lsl(Register rd, Register rm, Register rn, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - virtual void Lsr(Register rd, Register rm, Register rn, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - virtual void Asr(Register rd, Register rm, Register rn, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - virtual void Ror(Register rd, Register rm, Register rn, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - void Push(Register rd, Condition cond = AL) OVERRIDE; - void Pop(Register rd, Condition cond = AL) OVERRIDE; - - void PushList(RegList regs, Condition cond = AL) OVERRIDE; - void PopList(RegList regs, Condition cond = AL) OVERRIDE; - - void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; - - void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; - void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; - - // Memory barriers. - void dmb(DmbOptions flavor) OVERRIDE; - - // Get the final position of a label after local fixup based on the old position - // recorded before FinalizeCode(). - uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; - - Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; - void LoadLiteral(Register rt, Literal* literal) OVERRIDE; - void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; - void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; - void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; - - // Add signed constant value to rd. May clobber IP. - void AddConstant(Register rd, Register rn, int32_t value, - Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; - - void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE; - - // Load and Store. May clobber IP. - void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; - void MarkExceptionHandler(Label* label) OVERRIDE; - void LoadFromOffset(LoadOperandType type, - Register reg, - Register base, - int32_t offset, - Condition cond = AL) OVERRIDE; - void StoreToOffset(StoreOperandType type, - Register reg, - Register base, - int32_t offset, - Condition cond = AL) OVERRIDE; - void LoadSFromOffset(SRegister reg, - Register base, - int32_t offset, - Condition cond = AL) OVERRIDE; - void StoreSToOffset(SRegister reg, - Register base, - int32_t offset, - Condition cond = AL) OVERRIDE; - void LoadDFromOffset(DRegister reg, - Register base, - int32_t offset, - Condition cond = AL) OVERRIDE; - void StoreDToOffset(DRegister reg, - Register base, - int32_t offset, - Condition cond = AL) OVERRIDE; - - bool ShifterOperandCanHold(Register rd, - Register rn, - Opcode opcode, - uint32_t immediate, - SetCc set_cc, - ShifterOperand* shifter_op) OVERRIDE; - using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. - - bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; - - static bool IsInstructionForExceptionHandling(uintptr_t pc); - - // Emit data (e.g. encoded instruction or immediate) to the - // instruction stream. - void Emit(int32_t value); - void Bind(Label* label) OVERRIDE; - - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; - - JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; - void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE; - - void FinalizeCode() OVERRIDE; - - private: - void EmitType01(Condition cond, - int type, - Opcode opcode, - SetCc set_cc, - Register rn, - Register rd, - const ShifterOperand& so); - - void EmitType5(Condition cond, int offset, bool link); - - void EmitMemOp(Condition cond, - bool load, - bool byte, - Register rd, - const Address& ad); - - void EmitMemOpAddressMode3(Condition cond, - int32_t mode, - Register rd, - const Address& ad); - - void EmitMultiMemOp(Condition cond, - BlockAddressMode am, - bool load, - Register base, - RegList regs); - - void EmitShiftImmediate(Condition cond, - Shift opcode, - Register rd, - Register rm, - const ShifterOperand& so); - - void EmitShiftRegister(Condition cond, - Shift opcode, - Register rd, - Register rm, - const ShifterOperand& so); - - void EmitMulOp(Condition cond, - int32_t opcode, - Register rd, - Register rn, - Register rm, - Register rs); - - void EmitVFPsss(Condition cond, - int32_t opcode, - SRegister sd, - SRegister sn, - SRegister sm); - - void EmitVFPddd(Condition cond, - int32_t opcode, - DRegister dd, - DRegister dn, - DRegister dm); - - void EmitVFPsd(Condition cond, - int32_t opcode, - SRegister sd, - DRegister dm); - - void EmitVFPds(Condition cond, - int32_t opcode, - DRegister dd, - SRegister sm); - - void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); - - void EmitMiscellaneous(Condition cond, uint8_t op1, uint8_t op2, - uint32_t a_part, uint32_t rest); - void EmitReverseBytes(Register rd, Register rm, Condition cond, - uint8_t op1, uint8_t op2); - - void EmitBranch(Condition cond, Label* label, bool link); - static int32_t EncodeBranchOffset(int offset, int32_t inst); - static int DecodeBranchOffset(int32_t inst); - bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op); -}; - -} // namespace arm -} // namespace art - -#endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_ diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc deleted file mode 100644 index e570e22fca..0000000000 --- a/compiler/utils/arm/assembler_arm32_test.cc +++ /dev/null @@ -1,902 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "assembler_arm32.h" - -#include <functional> -#include <type_traits> - -#include "base/macros.h" -#include "base/stl_util.h" -#include "utils/arm/assembler_arm_test.h" - -namespace art { - -using std::placeholders::_1; -using std::placeholders::_2; -using std::placeholders::_3; -using std::placeholders::_4; -using std::placeholders::_5; - -// To speed up tests, don't use all register combinations. -static constexpr bool kUseSparseRegisterList = true; - -// To speed up tests, don't use all condition codes. -static constexpr bool kUseSparseConditionList = true; - -// To speed up tests, don't use all shift immediates. -static constexpr bool kUseSparseShiftImmediates = true; - -class AssemblerArm32Test : public AssemblerArmTest<arm::Arm32Assembler, - arm::Register, arm::SRegister, - uint32_t, arm::ShifterOperand, arm::Condition, - arm::SetCc> { - protected: - std::string GetArchitectureString() OVERRIDE { - return "arm"; - } - - std::string GetAssemblerParameters() OVERRIDE { - // Arm-v7a, cortex-a15 (means we have sdiv). - return " -march=armv7-a -mcpu=cortex-a15 -mfpu=neon"; - } - - const char* GetAssemblyHeader() OVERRIDE { - return kArm32AssemblyHeader; - } - - std::string GetDisassembleParameters() OVERRIDE { - return " -D -bbinary -marm --no-show-raw-insn"; - } - - void SetUpHelpers() OVERRIDE { - if (registers_.size() == 0) { - if (kUseSparseRegisterList) { - registers_.insert(end(registers_), - { // NOLINT(whitespace/braces) - new arm::Register(arm::R0), - new arm::Register(arm::R1), - new arm::Register(arm::R4), - new arm::Register(arm::R8), - new arm::Register(arm::R11), - new arm::Register(arm::R12), - new arm::Register(arm::R13), - new arm::Register(arm::R14), - new arm::Register(arm::R15) - }); - } else { - registers_.insert(end(registers_), - { // NOLINT(whitespace/braces) - new arm::Register(arm::R0), - new arm::Register(arm::R1), - new arm::Register(arm::R2), - new arm::Register(arm::R3), - new arm::Register(arm::R4), - new arm::Register(arm::R5), - new arm::Register(arm::R6), - new arm::Register(arm::R7), - new arm::Register(arm::R8), - new arm::Register(arm::R9), - new arm::Register(arm::R10), - new arm::Register(arm::R11), - new arm::Register(arm::R12), - new arm::Register(arm::R13), - new arm::Register(arm::R14), - new arm::Register(arm::R15) - }); - } - } - - if (!kUseSparseConditionList) { - conditions_.push_back(arm::Condition::EQ); - conditions_.push_back(arm::Condition::NE); - conditions_.push_back(arm::Condition::CS); - conditions_.push_back(arm::Condition::CC); - conditions_.push_back(arm::Condition::MI); - conditions_.push_back(arm::Condition::PL); - conditions_.push_back(arm::Condition::VS); - conditions_.push_back(arm::Condition::VC); - conditions_.push_back(arm::Condition::HI); - conditions_.push_back(arm::Condition::LS); - conditions_.push_back(arm::Condition::GE); - conditions_.push_back(arm::Condition::LT); - conditions_.push_back(arm::Condition::GT); - conditions_.push_back(arm::Condition::LE); - conditions_.push_back(arm::Condition::AL); - } else { - conditions_.push_back(arm::Condition::EQ); - conditions_.push_back(arm::Condition::NE); - conditions_.push_back(arm::Condition::CC); - conditions_.push_back(arm::Condition::VC); - conditions_.push_back(arm::Condition::HI); - conditions_.push_back(arm::Condition::LT); - conditions_.push_back(arm::Condition::AL); - } - - set_ccs_.push_back(arm::kCcDontCare); - set_ccs_.push_back(arm::kCcSet); - set_ccs_.push_back(arm::kCcKeep); - - shifter_operands_.push_back(arm::ShifterOperand(0)); - shifter_operands_.push_back(arm::ShifterOperand(1)); - shifter_operands_.push_back(arm::ShifterOperand(2)); - shifter_operands_.push_back(arm::ShifterOperand(3)); - shifter_operands_.push_back(arm::ShifterOperand(4)); - shifter_operands_.push_back(arm::ShifterOperand(5)); - shifter_operands_.push_back(arm::ShifterOperand(127)); - shifter_operands_.push_back(arm::ShifterOperand(128)); - shifter_operands_.push_back(arm::ShifterOperand(254)); - shifter_operands_.push_back(arm::ShifterOperand(255)); - - if (!kUseSparseRegisterList) { - shifter_operands_.push_back(arm::ShifterOperand(arm::R0)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R1)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R2)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R3)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R4)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R5)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R6)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R7)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R8)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R9)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R10)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R11)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R12)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R13)); - } else { - shifter_operands_.push_back(arm::ShifterOperand(arm::R0)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R1)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R4)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R8)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R11)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R12)); - shifter_operands_.push_back(arm::ShifterOperand(arm::R13)); - } - - std::vector<arm::Shift> shifts { - arm::Shift::LSL, arm::Shift::LSR, arm::Shift::ASR, arm::Shift::ROR, arm::Shift::RRX - }; - - // ShifterOperands of form "reg shift-type imm." - for (arm::Shift shift : shifts) { - for (arm::Register* reg : registers_) { // Note: this will pick up the sparse set. - if (*reg == arm::R15) { // Skip PC. - continue; - } - if (shift != arm::Shift::RRX) { - if (!kUseSparseShiftImmediates) { - for (uint32_t imm = 1; imm < 32; ++imm) { - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, imm)); - } - } else { - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 1)); - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 2)); - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 3)); - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 7)); - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 15)); - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 16)); - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 30)); - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 31)); - } - } else { - // RRX doesn't have an immediate. - shifter_operands_.push_back(arm::ShifterOperand(*reg, shift, 0)); - } - } - } - } - - std::vector<arm::ShifterOperand> CreateRegisterShifts(std::vector<arm::Register*>& base_regs, - int32_t shift_min, int32_t shift_max) { - std::vector<arm::ShifterOperand> res; - static constexpr arm::Shift kShifts[] = { arm::Shift::LSL, arm::Shift::LSR, arm::Shift::ASR, - arm::Shift::ROR }; - - for (arm::Shift shift : kShifts) { - for (arm::Register* reg : base_regs) { - // Take the min, the max, and three values in between. - res.push_back(arm::ShifterOperand(*reg, shift, shift_min)); - if (shift_min != shift_max) { - res.push_back(arm::ShifterOperand(*reg, shift, shift_max)); - int32_t middle = (shift_min + shift_max) / 2; - res.push_back(arm::ShifterOperand(*reg, shift, middle)); - res.push_back(arm::ShifterOperand(*reg, shift, middle - 1)); - res.push_back(arm::ShifterOperand(*reg, shift, middle + 1)); - } - } - } - - return res; - } - - void TearDown() OVERRIDE { - AssemblerArmTest::TearDown(); - STLDeleteElements(®isters_); - } - - std::vector<arm::Register*> GetRegisters() OVERRIDE { - return registers_; - } - - uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { - return imm_value; - } - - std::vector<arm::Condition>& GetConditions() OVERRIDE { - return conditions_; - } - - std::string GetConditionString(arm::Condition c) OVERRIDE { - std::ostringstream oss; - oss << c; - return oss.str(); - } - - std::vector<arm::SetCc>& GetSetCcs() OVERRIDE { - return set_ccs_; - } - - std::string GetSetCcString(arm::SetCc s) OVERRIDE { - // For arm32, kCcDontCare defaults to not setting condition codes. - return s == arm::kCcSet ? "s" : ""; - } - - arm::Register GetPCRegister() OVERRIDE { - return arm::R15; - } - - std::vector<arm::ShifterOperand>& GetShiftOperands() OVERRIDE { - return shifter_operands_; - } - - std::string GetShiftString(arm::ShifterOperand sop) OVERRIDE { - std::ostringstream oss; - if (sop.IsShift()) { - // Not a rotate... - if (sop.GetShift() == arm::Shift::RRX) { - oss << sop.GetRegister() << ", " << sop.GetShift(); - } else { - oss << sop.GetRegister() << ", " << sop.GetShift() << " #" << sop.GetImmediate(); - } - } else if (sop.IsRegister()) { - oss << sop.GetRegister(); - } else { - CHECK(sop.IsImmediate()); - oss << "#" << sop.GetImmediate(); - } - return oss.str(); - } - - static const char* GetRegTokenFromDepth(int depth) { - switch (depth) { - case 0: - return Base::REG1_TOKEN; - case 1: - return Base::REG2_TOKEN; - case 2: - return Base::REG3_TOKEN; - case 3: - return REG4_TOKEN; - default: - LOG(FATAL) << "Depth problem."; - UNREACHABLE(); - } - } - - void ExecuteAndPrint(std::function<void()> f, std::string fmt, std::ostringstream& oss) { - if (first_) { - first_ = false; - } else { - oss << "\n"; - } - oss << fmt; - - f(); - } - - // NOTE: Only support simple test like "aaa=bbb" - bool EvalFilterString(std::string filter) { - if (filter.compare("") == 0) { - return false; - } - - size_t equal_sign_index = filter.find('='); - if (equal_sign_index == std::string::npos) { - EXPECT_TRUE(false) << "Unsupported filter string."; - } - - std::string lhs = filter.substr(0, equal_sign_index); - std::string rhs = filter.substr(equal_sign_index + 1, std::string::npos); - return lhs.compare(rhs) == 0; - } - - void TemplateHelper(std::function<void(arm::Register)> f, int depth ATTRIBUTE_UNUSED, - bool without_pc, std::string fmt, std::string filter, - std::ostringstream& oss) { - std::vector<arm::Register*> registers = without_pc ? GetRegistersWithoutPC() : GetRegisters(); - for (auto reg : registers) { - std::string after_reg = fmt; - std::string after_reg_filter = filter; - - std::string reg_string = GetRegName<RegisterView::kUsePrimaryName>(*reg); - size_t reg_index; - const char* reg_token = GetRegTokenFromDepth(depth); - - while ((reg_index = after_reg.find(reg_token)) != std::string::npos) { - after_reg.replace(reg_index, strlen(reg_token), reg_string); - } - - while ((reg_index = after_reg_filter.find(reg_token)) != std::string::npos) { - after_reg_filter.replace(reg_index, strlen(reg_token), reg_string); - } - if (EvalFilterString(after_reg_filter)) { - continue; - } - - ExecuteAndPrint([&] () { f(*reg); }, after_reg, oss); - } - } - - void TemplateHelper(std::function<void(const arm::ShifterOperand&)> f, int depth ATTRIBUTE_UNUSED, - bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter, - std::ostringstream& oss) { - for (const arm::ShifterOperand& shift : GetShiftOperands()) { - std::string after_shift = fmt; - std::string after_shift_filter = filter; - - std::string shift_string = GetShiftString(shift); - size_t shift_index; - while ((shift_index = after_shift.find(SHIFT_TOKEN)) != std::string::npos) { - after_shift.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string); - } - - while ((shift_index = after_shift_filter.find(SHIFT_TOKEN)) != std::string::npos) { - after_shift_filter.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string); - } - if (EvalFilterString(after_shift_filter)) { - continue; - } - - ExecuteAndPrint([&] () { f(shift); }, after_shift, oss); - } - } - - void TemplateHelper(std::function<void(arm::Condition)> f, int depth ATTRIBUTE_UNUSED, - bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter, - std::ostringstream& oss) { - for (arm::Condition c : GetConditions()) { - std::string after_cond = fmt; - std::string after_cond_filter = filter; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - cond_index = after_cond_filter.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond_filter.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - if (EvalFilterString(after_cond_filter)) { - continue; - } - - ExecuteAndPrint([&] () { f(c); }, after_cond, oss); - } - } - - void TemplateHelper(std::function<void(arm::SetCc)> f, int depth ATTRIBUTE_UNUSED, - bool without_pc ATTRIBUTE_UNUSED, std::string fmt, std::string filter, - std::ostringstream& oss) { - for (arm::SetCc s : GetSetCcs()) { - std::string after_cond = fmt; - std::string after_cond_filter = filter; - - size_t cond_index = after_cond.find(SET_CC_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s)); - } - - cond_index = after_cond_filter.find(SET_CC_TOKEN); - if (cond_index != std::string::npos) { - after_cond_filter.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s)); - } - if (EvalFilterString(after_cond_filter)) { - continue; - } - - ExecuteAndPrint([&] () { f(s); }, after_cond, oss); - } - } - - template <typename... Args> - void TemplateHelper(std::function<void(arm::Register, Args...)> f, int depth, bool without_pc, - std::string fmt, std::string filter, std::ostringstream& oss) { - std::vector<arm::Register*> registers = without_pc ? GetRegistersWithoutPC() : GetRegisters(); - for (auto reg : registers) { - std::string after_reg = fmt; - std::string after_reg_filter = filter; - - std::string reg_string = GetRegName<RegisterView::kUsePrimaryName>(*reg); - size_t reg_index; - const char* reg_token = GetRegTokenFromDepth(depth); - - while ((reg_index = after_reg.find(reg_token)) != std::string::npos) { - after_reg.replace(reg_index, strlen(reg_token), reg_string); - } - - while ((reg_index = after_reg_filter.find(reg_token)) != std::string::npos) { - after_reg_filter.replace(reg_index, strlen(reg_token), reg_string); - } - if (EvalFilterString(after_reg_filter)) { - continue; - } - - auto lambda = [&] (Args... args) { f(*reg, args...); }; // NOLINT [readability/braces] [4] - TemplateHelper(std::function<void(Args...)>(lambda), depth + 1, without_pc, - after_reg, after_reg_filter, oss); - } - } - - template <typename... Args> - void TemplateHelper(std::function<void(const arm::ShifterOperand&, Args...)> f, int depth, - bool without_pc, std::string fmt, std::string filter, - std::ostringstream& oss) { - for (const arm::ShifterOperand& shift : GetShiftOperands()) { - std::string after_shift = fmt; - std::string after_shift_filter = filter; - - std::string shift_string = GetShiftString(shift); - size_t shift_index; - while ((shift_index = after_shift.find(SHIFT_TOKEN)) != std::string::npos) { - after_shift.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string); - } - - while ((shift_index = after_shift_filter.find(SHIFT_TOKEN)) != std::string::npos) { - after_shift_filter.replace(shift_index, ConstexprStrLen(SHIFT_TOKEN), shift_string); - } - if (EvalFilterString(after_shift_filter)) { - continue; - } - - auto lambda = [&] (Args... args) { f(shift, args...); }; // NOLINT [readability/braces] [4] - TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc, - after_shift, after_shift_filter, oss); - } - } - - template <typename... Args> - void TemplateHelper(std::function<void(arm::Condition, Args...)> f, int depth, bool without_pc, - std::string fmt, std::string filter, std::ostringstream& oss) { - for (arm::Condition c : GetConditions()) { - std::string after_cond = fmt; - std::string after_cond_filter = filter; - - size_t cond_index = after_cond.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - - cond_index = after_cond_filter.find(COND_TOKEN); - if (cond_index != std::string::npos) { - after_cond_filter.replace(cond_index, ConstexprStrLen(COND_TOKEN), GetConditionString(c)); - } - if (EvalFilterString(after_cond_filter)) { - continue; - } - - auto lambda = [&] (Args... args) { f(c, args...); }; // NOLINT [readability/braces] [4] - TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc, - after_cond, after_cond_filter, oss); - } - } - - template <typename... Args> - void TemplateHelper(std::function<void(arm::SetCc, Args...)> f, int depth, bool without_pc, - std::string fmt, std::string filter, std::ostringstream& oss) { - for (arm::SetCc s : GetSetCcs()) { - std::string after_cond = fmt; - std::string after_cond_filter = filter; - - size_t cond_index = after_cond.find(SET_CC_TOKEN); - if (cond_index != std::string::npos) { - after_cond.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s)); - } - - cond_index = after_cond_filter.find(SET_CC_TOKEN); - if (cond_index != std::string::npos) { - after_cond_filter.replace(cond_index, ConstexprStrLen(SET_CC_TOKEN), GetSetCcString(s)); - } - if (EvalFilterString(after_cond_filter)) { - continue; - } - - auto lambda = [&] (Args... args) { f(s, args...); }; // NOLINT [readability/braces] [4] - TemplateHelper(std::function<void(Args...)>(lambda), depth, without_pc, - after_cond, after_cond_filter, oss); - } - } - - template <typename Assembler, typename T1, typename T2> - std::function<void(T1, T2)> GetBoundFunction2(void (Assembler::*f)(T1, T2)) { - return std::bind(f, GetAssembler(), _1, _2); - } - - template <typename Assembler, typename T1, typename T2, typename T3> - std::function<void(T1, T2, T3)> GetBoundFunction3(void (Assembler::*f)(T1, T2, T3)) { - return std::bind(f, GetAssembler(), _1, _2, _3); - } - - template <typename Assembler, typename T1, typename T2, typename T3, typename T4> - std::function<void(T1, T2, T3, T4)> GetBoundFunction4( - void (Assembler::*f)(T1, T2, T3, T4)) { - return std::bind(f, GetAssembler(), _1, _2, _3, _4); - } - - template <typename Assembler, typename T1, typename T2, typename T3, typename T4, typename T5> - std::function<void(T1, T2, T3, T4, T5)> GetBoundFunction5( - void (Assembler::*f)(T1, T2, T3, T4, T5)) { - return std::bind(f, GetAssembler(), _1, _2, _3, _4, _5); - } - - template <typename... Args> - void GenericTemplateHelper(std::function<void(Args...)> f, bool without_pc, - std::string fmt, std::string test_name, std::string filter) { - first_ = false; - WarnOnCombinations(CountHelper<Args...>(without_pc)); - - std::ostringstream oss; - - TemplateHelper(f, 0, without_pc, fmt, filter, oss); - - oss << "\n"; // Trailing newline. - - DriverStr(oss.str(), test_name); - } - - template <typename Assembler, typename... Args> - void T2Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt, - std::string test_name, std::string filter = "") { - GenericTemplateHelper(GetBoundFunction2(f), without_pc, fmt, test_name, filter); - } - - template <typename Assembler, typename... Args> - void T3Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt, - std::string test_name, std::string filter = "") { - GenericTemplateHelper(GetBoundFunction3(f), without_pc, fmt, test_name, filter); - } - - template <typename Assembler, typename... Args> - void T4Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt, - std::string test_name, std::string filter = "") { - GenericTemplateHelper(GetBoundFunction4(f), without_pc, fmt, test_name, filter); - } - - template <typename Assembler, typename... Args> - void T5Helper(void (Assembler::*f)(Args...), bool without_pc, std::string fmt, - std::string test_name, std::string filter = "") { - GenericTemplateHelper(GetBoundFunction5(f), without_pc, fmt, test_name, filter); - } - - private: - template <typename T> - size_t CountHelper(bool without_pc) { - size_t tmp; - if (std::is_same<T, arm::Register>::value) { - tmp = GetRegisters().size(); - if (without_pc) { - tmp--;; // Approximation... - } - return tmp; - } else if (std::is_same<T, const arm::ShifterOperand&>::value) { - return GetShiftOperands().size(); - } else if (std::is_same<T, arm::Condition>::value) { - return GetConditions().size(); - } else { - LOG(WARNING) << "Unknown type while counting."; - return 1; - } - } - - template <typename T1, typename T2, typename... Args> - size_t CountHelper(bool without_pc) { - size_t tmp; - if (std::is_same<T1, arm::Register>::value) { - tmp = GetRegisters().size(); - if (without_pc) { - tmp--;; // Approximation... - } - } else if (std::is_same<T1, const arm::ShifterOperand&>::value) { - tmp = GetShiftOperands().size(); - } else if (std::is_same<T1, arm::Condition>::value) { - tmp = GetConditions().size(); - } else { - LOG(WARNING) << "Unknown type while counting."; - tmp = 1; - } - size_t rec = CountHelper<T2, Args...>(without_pc); - return rec * tmp; - } - - bool first_; - - static constexpr const char* kArm32AssemblyHeader = ".arm\n"; - - std::vector<arm::Register*> registers_; - std::vector<arm::Condition> conditions_; - std::vector<arm::SetCc> set_ccs_; - std::vector<arm::ShifterOperand> shifter_operands_; -}; - - -TEST_F(AssemblerArm32Test, Toolchain) { - EXPECT_TRUE(CheckTools()); -} - -TEST_F(AssemblerArm32Test, Sbfx) { - std::vector<std::pair<uint32_t, uint32_t>> immediates; - immediates.push_back({0, 1}); - immediates.push_back({0, 8}); - immediates.push_back({0, 15}); - immediates.push_back({0, 16}); - immediates.push_back({0, 31}); - immediates.push_back({0, 32}); - - immediates.push_back({1, 1}); - immediates.push_back({1, 15}); - immediates.push_back({1, 31}); - - immediates.push_back({8, 1}); - immediates.push_back({8, 15}); - immediates.push_back({8, 16}); - immediates.push_back({8, 24}); - - immediates.push_back({31, 1}); - - DriverStr(RepeatRRiiC(&arm::Arm32Assembler::sbfx, immediates, - "sbfx{cond} {reg1}, {reg2}, #{imm1}, #{imm2}"), "sbfx"); -} - -TEST_F(AssemblerArm32Test, Ubfx) { - std::vector<std::pair<uint32_t, uint32_t>> immediates; - immediates.push_back({0, 1}); - immediates.push_back({0, 8}); - immediates.push_back({0, 15}); - immediates.push_back({0, 16}); - immediates.push_back({0, 31}); - immediates.push_back({0, 32}); - - immediates.push_back({1, 1}); - immediates.push_back({1, 15}); - immediates.push_back({1, 31}); - - immediates.push_back({8, 1}); - immediates.push_back({8, 15}); - immediates.push_back({8, 16}); - immediates.push_back({8, 24}); - - immediates.push_back({31, 1}); - - DriverStr(RepeatRRiiC(&arm::Arm32Assembler::ubfx, immediates, - "ubfx{cond} {reg1}, {reg2}, #{imm1}, #{imm2}"), "ubfx"); -} - -TEST_F(AssemblerArm32Test, Mul) { - T4Helper(&arm::Arm32Assembler::mul, true, "mul{cond} {reg1}, {reg2}, {reg3}", "mul"); -} - -TEST_F(AssemblerArm32Test, Mla) { - T5Helper(&arm::Arm32Assembler::mla, true, "mla{cond} {reg1}, {reg2}, {reg3}, {reg4}", "mla"); -} - -TEST_F(AssemblerArm32Test, Umull) { - T5Helper(&arm::Arm32Assembler::umull, true, "umull{cond} {reg1}, {reg2}, {reg3}, {reg4}", - "umull", "{reg1}={reg2}"); // Skip the cases where reg1 == reg2. -} - -TEST_F(AssemblerArm32Test, Smull) { - T5Helper(&arm::Arm32Assembler::smull, true, "smull{cond} {reg1}, {reg2}, {reg3}, {reg4}", - "smull", "{reg1}={reg2}"); // Skip the cases where reg1 == reg2. -} - -TEST_F(AssemblerArm32Test, Sdiv) { - T4Helper(&arm::Arm32Assembler::sdiv, true, "sdiv{cond} {reg1}, {reg2}, {reg3}", "sdiv"); -} - -TEST_F(AssemblerArm32Test, Udiv) { - T4Helper(&arm::Arm32Assembler::udiv, true, "udiv{cond} {reg1}, {reg2}, {reg3}", "udiv"); -} - -TEST_F(AssemblerArm32Test, And) { - T5Helper(&arm::Arm32Assembler::and_, true, "and{cond}{s} {reg1}, {reg2}, {shift}", "and"); -} - -TEST_F(AssemblerArm32Test, Ands) { - T4Helper(&arm::Arm32Assembler::ands, true, "and{cond}s {reg1}, {reg2}, {shift}", "ands"); -} - -TEST_F(AssemblerArm32Test, Eor) { - T5Helper(&arm::Arm32Assembler::eor, true, "eor{cond}{s} {reg1}, {reg2}, {shift}", "eor"); -} - -TEST_F(AssemblerArm32Test, Eors) { - T4Helper(&arm::Arm32Assembler::eors, true, "eor{cond}s {reg1}, {reg2}, {shift}", "eors"); -} - -TEST_F(AssemblerArm32Test, Orr) { - T5Helper(&arm::Arm32Assembler::orr, true, "orr{cond}{s} {reg1}, {reg2}, {shift}", "orr"); -} - -TEST_F(AssemblerArm32Test, Orrs) { - T4Helper(&arm::Arm32Assembler::orrs, true, "orr{cond}s {reg1}, {reg2}, {shift}", "orrs"); -} - -TEST_F(AssemblerArm32Test, Bic) { - T5Helper(&arm::Arm32Assembler::bic, true, "bic{cond}{s} {reg1}, {reg2}, {shift}", "bic"); -} - -TEST_F(AssemblerArm32Test, Bics) { - T4Helper(&arm::Arm32Assembler::bics, true, "bic{cond}s {reg1}, {reg2}, {shift}", "bics"); -} - -TEST_F(AssemblerArm32Test, Mov) { - T4Helper(&arm::Arm32Assembler::mov, true, "mov{cond}{s} {reg1}, {shift}", "mov"); -} - -TEST_F(AssemblerArm32Test, Movs) { - T3Helper(&arm::Arm32Assembler::movs, true, "mov{cond}s {reg1}, {shift}", "movs"); -} - -TEST_F(AssemblerArm32Test, Mvn) { - T4Helper(&arm::Arm32Assembler::mvn, true, "mvn{cond}{s} {reg1}, {shift}", "mvn"); -} - -TEST_F(AssemblerArm32Test, Mvns) { - T3Helper(&arm::Arm32Assembler::mvns, true, "mvn{cond}s {reg1}, {shift}", "mvns"); -} - -TEST_F(AssemblerArm32Test, Add) { - T5Helper(&arm::Arm32Assembler::add, false, "add{cond}{s} {reg1}, {reg2}, {shift}", "add"); -} - -TEST_F(AssemblerArm32Test, Adds) { - T4Helper(&arm::Arm32Assembler::adds, false, "add{cond}s {reg1}, {reg2}, {shift}", "adds"); -} - -TEST_F(AssemblerArm32Test, Adc) { - T5Helper(&arm::Arm32Assembler::adc, false, "adc{cond}{s} {reg1}, {reg2}, {shift}", "adc"); -} - -TEST_F(AssemblerArm32Test, Adcs) { - T4Helper(&arm::Arm32Assembler::adcs, false, "adc{cond}s {reg1}, {reg2}, {shift}", "adcs"); -} - -TEST_F(AssemblerArm32Test, Sub) { - T5Helper(&arm::Arm32Assembler::sub, false, "sub{cond}{s} {reg1}, {reg2}, {shift}", "sub"); -} - -TEST_F(AssemblerArm32Test, Subs) { - T4Helper(&arm::Arm32Assembler::subs, false, "sub{cond}s {reg1}, {reg2}, {shift}", "subs"); -} - -TEST_F(AssemblerArm32Test, Sbc) { - T5Helper(&arm::Arm32Assembler::sbc, false, "sbc{cond}{s} {reg1}, {reg2}, {shift}", "sbc"); -} - -TEST_F(AssemblerArm32Test, Sbcs) { - T4Helper(&arm::Arm32Assembler::sbcs, false, "sbc{cond}s {reg1}, {reg2}, {shift}", "sbcs"); -} - -TEST_F(AssemblerArm32Test, Rsb) { - T5Helper(&arm::Arm32Assembler::rsb, true, "rsb{cond}{s} {reg1}, {reg2}, {shift}", "rsb"); -} - -TEST_F(AssemblerArm32Test, Rsbs) { - T4Helper(&arm::Arm32Assembler::rsbs, true, "rsb{cond}s {reg1}, {reg2}, {shift}", "rsbs"); -} - -TEST_F(AssemblerArm32Test, Rsc) { - T5Helper(&arm::Arm32Assembler::rsc, true, "rsc{cond}{s} {reg1}, {reg2}, {shift}", "rsc"); -} - -TEST_F(AssemblerArm32Test, Rscs) { - T4Helper(&arm::Arm32Assembler::rscs, false, "rsc{cond}s {reg1}, {reg2}, {shift}", "rscs"); -} - -/* TODO: Need better filter support. -TEST_F(AssemblerArm32Test, Strex) { - T4Helper(&arm::Arm32Assembler::strex, "strex{cond} {reg1}, {reg2}, [{reg3}]", "strex", - "{reg1}={reg2}||{reg1}={reg3}"); // Skip the cases where reg1 == reg2 || reg1 == reg3. -} -*/ - -TEST_F(AssemblerArm32Test, Clz) { - T3Helper(&arm::Arm32Assembler::clz, true, "clz{cond} {reg1}, {reg2}", "clz"); -} - -TEST_F(AssemblerArm32Test, Tst) { - T3Helper(&arm::Arm32Assembler::tst, true, "tst{cond} {reg1}, {shift}", "tst"); -} - -TEST_F(AssemblerArm32Test, Teq) { - T3Helper(&arm::Arm32Assembler::teq, true, "teq{cond} {reg1}, {shift}", "teq"); -} - -TEST_F(AssemblerArm32Test, Cmp) { - T3Helper(&arm::Arm32Assembler::cmp, true, "cmp{cond} {reg1}, {shift}", "cmp"); -} - -TEST_F(AssemblerArm32Test, Cmn) { - T3Helper(&arm::Arm32Assembler::cmn, true, "cmn{cond} {reg1}, {shift}", "cmn"); -} - -TEST_F(AssemblerArm32Test, Blx) { - T2Helper(&arm::Arm32Assembler::blx, true, "blx{cond} {reg1}", "blx"); -} - -TEST_F(AssemblerArm32Test, Bx) { - T2Helper(&arm::Arm32Assembler::bx, true, "bx{cond} {reg1}", "bx"); -} - -TEST_F(AssemblerArm32Test, Vmstat) { - GetAssembler()->vmstat(); - - const char* expected = "vmrs APSR_nzcv, FPSCR\n"; - - DriverStr(expected, "vmrs"); -} - -TEST_F(AssemblerArm32Test, ldrexd) { - GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R0); - GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R1); - GetAssembler()->ldrexd(arm::R0, arm::R1, arm::R2); - - const char* expected = - "ldrexd r0, r1, [r0]\n" - "ldrexd r0, r1, [r1]\n" - "ldrexd r0, r1, [r2]\n"; - DriverStr(expected, "ldrexd"); -} - -TEST_F(AssemblerArm32Test, strexd) { - GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R0); - GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R1); - GetAssembler()->strexd(arm::R9, arm::R0, arm::R1, arm::R2); - - const char* expected = - "strexd r9, r0, r1, [r0]\n" - "strexd r9, r0, r1, [r1]\n" - "strexd r9, r0, r1, [r2]\n"; - DriverStr(expected, "strexd"); -} - -TEST_F(AssemblerArm32Test, rbit) { - T3Helper(&arm::Arm32Assembler::rbit, true, "rbit{cond} {reg1}, {reg2}", "rbit"); -} - -TEST_F(AssemblerArm32Test, rev) { - T3Helper(&arm::Arm32Assembler::rev, true, "rev{cond} {reg1}, {reg2}", "rev"); -} - -TEST_F(AssemblerArm32Test, rev16) { - T3Helper(&arm::Arm32Assembler::rev16, true, "rev16{cond} {reg1}, {reg2}", "rev16"); -} - -TEST_F(AssemblerArm32Test, revsh) { - T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh"); -} - -} // namespace art diff --git a/compiler/utils/arm/assembler_arm_shared.h b/compiler/utils/arm/assembler_arm_shared.h new file mode 100644 index 0000000000..21f13eeab7 --- /dev/null +++ b/compiler/utils/arm/assembler_arm_shared.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_ +#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_ + +namespace art { +namespace arm { + +enum LoadOperandType { + kLoadSignedByte, + kLoadUnsignedByte, + kLoadSignedHalfword, + kLoadUnsignedHalfword, + kLoadWord, + kLoadWordPair, + kLoadSWord, + kLoadDWord +}; + +enum StoreOperandType { + kStoreByte, + kStoreHalfword, + kStoreWord, + kStoreWordPair, + kStoreSWord, + kStoreDWord +}; + +// Set condition codes request. +enum SetCc { + kCcDontCare, // Allows prioritizing 16-bit instructions on Thumb2 whether they set CCs or not. + kCcSet, + kCcKeep, +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_SHARED_H_ diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc new file mode 100644 index 0000000000..6afc3ddecb --- /dev/null +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -0,0 +1,494 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <iostream> +#include <type_traits> + +#include "assembler_arm_vixl.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "thread.h" + +using namespace vixl::aarch32; // NOLINT(build/namespaces) + +using vixl::ExactAssemblyScope; +using vixl::CodeBufferCheckScope; + +namespace art { +namespace arm { + +#ifdef ___ +#error "ARM Assembler macro already defined." +#else +#define ___ vixl_masm_. +#endif + +extern const vixl32::Register tr(TR); + +void ArmVIXLAssembler::FinalizeCode() { + vixl_masm_.FinalizeCode(); +} + +size_t ArmVIXLAssembler::CodeSize() const { + return vixl_masm_.GetSizeOfCodeGenerated(); +} + +const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const { + return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); +} + +void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) { + // Copy the instructions from the buffer. + MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); + region.CopyFrom(0, from); +} + +void ArmVIXLAssembler::PoisonHeapReference(vixl::aarch32::Register reg) { + // reg = -reg. + ___ Rsb(reg, reg, 0); +} + +void ArmVIXLAssembler::UnpoisonHeapReference(vixl::aarch32::Register reg) { + // reg = -reg. + ___ Rsb(reg, reg, 0); +} + +void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } +} + +void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } +} + +void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) { + // TODO(VIXL): Implement this optimization in VIXL. + if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) { + ___ Mvn(rd, ~value); + } else { + ___ Mov(rd, value); + } +} + +bool ArmVIXLAssembler::ShifterOperandCanAlwaysHold(uint32_t immediate) { + return vixl_masm_.IsModifiedImmediate(immediate); +} + +bool ArmVIXLAssembler::ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc) { + switch (opcode) { + case ADD: + case SUB: + // Less than (or equal to) 12 bits can be done if we don't need to set condition codes. + if (IsUint<12>(immediate) && set_cc != kCcSet) { + return true; + } + return ShifterOperandCanAlwaysHold(immediate); + + case MOV: + // TODO: Support less than or equal to 12bits. + return ShifterOperandCanAlwaysHold(immediate); + + case MVN: + default: + return ShifterOperandCanAlwaysHold(immediate); + } +} + +bool ArmVIXLAssembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store) { + int32_t other_bits = offset & ~allowed_offset_bits; + if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) { + *add_to_base = offset & ~allowed_offset_bits; + *offset_for_load_store = offset & allowed_offset_bits; + return true; + } + return false; +} + +int32_t ArmVIXLAssembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits, + vixl32::Register temp, + vixl32::Register base, + int32_t offset) { + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + ___ Add(temp, base, add_to_base); + return offset_for_load; + } else { + ___ Mov(temp, offset); + ___ Add(temp, temp, base); + return 0; + } +} + +// TODO(VIXL): Implement this in VIXL. +int32_t ArmVIXLAssembler::GetAllowedLoadOffsetBits(LoadOperandType type) { + switch (type) { + case kLoadSignedByte: + case kLoadSignedHalfword: + case kLoadUnsignedHalfword: + case kLoadUnsignedByte: + case kLoadWord: + // We can encode imm12 offset. + return 0xfff; + case kLoadSWord: + case kLoadDWord: + case kLoadWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +// TODO(VIXL): Implement this in VIXL. +int32_t ArmVIXLAssembler::GetAllowedStoreOffsetBits(StoreOperandType type) { + switch (type) { + case kStoreHalfword: + case kStoreByte: + case kStoreWord: + // We can encode imm12 offset. + return 0xfff; + case kStoreSWord: + case kStoreDWord: + case kStoreWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +// TODO(VIXL): Implement this in VIXL. +static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { + switch (type) { + case kLoadSignedByte: + case kLoadSignedHalfword: + case kLoadUnsignedHalfword: + case kLoadUnsignedByte: + case kLoadWord: + return IsAbsoluteUint<12>(offset); + case kLoadSWord: + case kLoadDWord: + return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode. + case kLoadWordPair: + return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +// TODO(VIXL): Implement this in VIXL. +static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { + switch (type) { + case kStoreHalfword: + case kStoreByte: + case kStoreWord: + return IsAbsoluteUint<12>(offset); + case kStoreSWord: + case kStoreDWord: + return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); // VFP addressing mode. + case kStoreWordPair: + return IsAbsoluteUint<10>(offset) && IsAligned<4>(offset); + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +// Implementation note: this method must emit at most one instruction when +// Address::CanHoldStoreOffsetThumb. +// TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL. +void ArmVIXLAssembler::StoreToOffset(StoreOperandType type, + vixl32::Register reg, + vixl32::Register base, + int32_t offset) { + vixl32::Register tmp_reg; + UseScratchRegisterScope temps(&vixl_masm_); + + if (!CanHoldStoreOffsetThumb(type, offset)) { + CHECK_NE(base.GetCode(), kIpCode); + if ((reg.GetCode() != kIpCode) && + (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) && + ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) { + tmp_reg = temps.Acquire(); + } else { + // Be careful not to use ip twice (for `reg` (or `reg` + 1 in + // the case of a word-pair store) and `base`) to build the + // Address object used by the store instruction(s) below. + // Instead, save R5 on the stack (or R6 if R5 is already used by + // `base`), use it as secondary temporary register, and restore + // it after the store instruction has been emitted. + tmp_reg = (base.GetCode() != 5) ? r5 : r6; + ___ Push(tmp_reg); + if (base.GetCode() == kSpCode) { + offset += kRegisterSize; + } + } + // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset() + // and in the "unsplittable" path get rid of the "add" by using the store indexed instead. + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset); + base = tmp_reg; + } + DCHECK(CanHoldStoreOffsetThumb(type, offset)); + switch (type) { + case kStoreByte: + ___ Strb(reg, MemOperand(base, offset)); + break; + case kStoreHalfword: + ___ Strh(reg, MemOperand(base, offset)); + break; + case kStoreWord: + ___ Str(reg, MemOperand(base, offset)); + break; + case kStoreWordPair: + ___ Strd(reg, vixl32::Register(reg.GetCode() + 1), MemOperand(base, offset)); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } + if ((tmp_reg.IsValid()) && (tmp_reg.GetCode() != kIpCode)) { + CHECK(tmp_reg.Is(r5) || tmp_reg.Is(r6)) << tmp_reg; + ___ Pop(tmp_reg); + } +} + +// Implementation note: this method must emit at most one instruction when +// Address::CanHoldLoadOffsetThumb. +// TODO(VIXL): Implement AdjustLoadStoreOffset logic in VIXL. +void ArmVIXLAssembler::LoadFromOffset(LoadOperandType type, + vixl32::Register dest, + vixl32::Register base, + int32_t offset) { + if (!CanHoldLoadOffsetThumb(type, offset)) { + CHECK(!base.Is(ip)); + // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks. + int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type); + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + AddConstant(dest, base, add_to_base); + base = dest; + offset = offset_for_load; + } else { + UseScratchRegisterScope temps(&vixl_masm_); + vixl32::Register temp = (dest.Is(base)) ? temps.Acquire() : dest; + LoadImmediate(temp, offset); + // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD. + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + ___ Add(dest, dest, (dest.Is(base)) ? temp : base); + base = dest; + offset = 0; + } + } + + DCHECK(CanHoldLoadOffsetThumb(type, offset)); + switch (type) { + case kLoadSignedByte: + ___ Ldrsb(dest, MemOperand(base, offset)); + break; + case kLoadUnsignedByte: + ___ Ldrb(dest, MemOperand(base, offset)); + break; + case kLoadSignedHalfword: + ___ Ldrsh(dest, MemOperand(base, offset)); + break; + case kLoadUnsignedHalfword: + ___ Ldrh(dest, MemOperand(base, offset)); + break; + case kLoadWord: + CHECK(!dest.IsSP()); + ___ Ldr(dest, MemOperand(base, offset)); + break; + case kLoadWordPair: + ___ Ldrd(dest, vixl32::Register(dest.GetCode() + 1), MemOperand(base, offset)); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +void ArmVIXLAssembler::StoreSToOffset(vixl32::SRegister source, + vixl32::Register base, + int32_t offset) { + ___ Vstr(source, MemOperand(base, offset)); +} + +void ArmVIXLAssembler::StoreDToOffset(vixl32::DRegister source, + vixl32::Register base, + int32_t offset) { + ___ Vstr(source, MemOperand(base, offset)); +} + +void ArmVIXLAssembler::LoadSFromOffset(vixl32::SRegister reg, + vixl32::Register base, + int32_t offset) { + ___ Vldr(reg, MemOperand(base, offset)); +} + +void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg, + vixl32::Register base, + int32_t offset) { + ___ Vldr(reg, MemOperand(base, offset)); +} + +// Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and +// ArmVIXLAssembler::LoadRegisterList where this generates less code (size). +static constexpr int kRegListThreshold = 4; + +void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) { + int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); + if (number_of_regs != 0) { + if (number_of_regs > kRegListThreshold) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + DCHECK_EQ(regs & (1u << base.GetCode()), 0u); + ___ Add(base, sp, Operand::From(stack_offset)); + } + ___ Stm(base, NO_WRITE_BACK, RegisterList(regs)); + } else { + for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { + ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset)); + stack_offset += kRegSizeInBytes; + } + } + } +} + +void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) { + int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); + if (number_of_regs != 0) { + if (number_of_regs > kRegListThreshold) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + ___ Add(base, sp, Operand::From(stack_offset)); + } + ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs)); + } else { + for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { + ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset)); + stack_offset += kRegSizeInBytes; + } + } + } +} + +void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) { + AddConstant(rd, rd, value); +} + +// TODO(VIXL): think about using adds which updates flags where possible. +void ArmVIXLAssembler::AddConstant(vixl32::Register rd, + vixl32::Register rn, + int32_t value) { + DCHECK(vixl_masm_.OutsideITBlock()); + // TODO(VIXL): implement this optimization in VIXL. + if (value == 0) { + if (!rd.Is(rn)) { + ___ Mov(rd, rn); + } + return; + } + ___ Add(rd, rn, value); +} + +// Inside IT block we must use assembler, macroassembler instructions are not permitted. +void ArmVIXLAssembler::AddConstantInIt(vixl32::Register rd, + vixl32::Register rn, + int32_t value, + vixl32::Condition cond) { + DCHECK(vixl_masm_.InITBlock()); + if (value == 0) { + ___ mov(cond, rd, rn); + } else { + ___ add(cond, rd, rn, value); + } +} + +void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn, + vixl32::Label* label, + bool is_far_target) { + if (!is_far_target && rn.IsLow() && !label->IsBound()) { + // In T32, Cbz/Cbnz instructions have following limitations: + // - There are only 7 bits (i:imm5:0) to encode branch target address (cannot be far target). + // - Only low registers (i.e R0 .. R7) can be encoded. + // - Only forward branches (unbound labels) are supported. + Cbz(rn, label); + return; + } + Cmp(rn, 0); + B(eq, label, is_far_target); +} + +void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn, + vixl32::Label* label, + bool is_far_target) { + if (!is_far_target && rn.IsLow() && !label->IsBound()) { + Cbnz(rn, label); + return; + } + Cmp(rn, 0); + B(ne, label, is_far_target); +} + +void ArmVIXLMacroAssembler::B(vixl32::Label* label) { + if (!label->IsBound()) { + // Try to use 16-bit T2 encoding of B instruction. + DCHECK(OutsideITBlock()); + ExactAssemblyScope guard(this, + k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + b(al, Narrow, label); + AddBranchLabel(label); + return; + } + MacroAssembler::B(label); +} + +void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) { + if (!label->IsBound() && !is_far_target) { + // Try to use 16-bit T2 encoding of B instruction. + DCHECK(OutsideITBlock()); + ExactAssemblyScope guard(this, + k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + b(cond, Narrow, label); + AddBranchLabel(label); + return; + } + // To further reduce the Bcc encoding size and use 16-bit T1 encoding, + // we can provide a hint to this function: i.e. far_target=false. + // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding. + MacroAssembler::B(cond, label); +} + +} // namespace arm +} // namespace art diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h new file mode 100644 index 0000000000..e81e767575 --- /dev/null +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_VIXL_H_ +#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_VIXL_H_ + +#include "base/arena_containers.h" +#include "base/logging.h" +#include "constants_arm.h" +#include "offsets.h" +#include "utils/arm/assembler_arm_shared.h" +#include "utils/arm/managed_register_arm.h" +#include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" + +// TODO(VIXL): Make VIXL compile with -Wshadow and remove pragmas. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch32/macro-assembler-aarch32.h" +#pragma GCC diagnostic pop + +namespace vixl32 = vixl::aarch32; + +namespace art { +namespace arm { + +class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { + public: + // Most methods fit in a 1KB code buffer, which results in more optimal alloc/realloc and + // fewer system calls than a larger default capacity. + static constexpr size_t kDefaultCodeBufferCapacity = 1 * KB; + + ArmVIXLMacroAssembler() + : vixl32::MacroAssembler(ArmVIXLMacroAssembler::kDefaultCodeBufferCapacity) {} + + // The following interfaces can generate CMP+Bcc or Cbz/Cbnz. + // CMP+Bcc are generated by default. + // If a hint is given (is_far_target = false) and rn and label can all fit into Cbz/Cbnz, + // then Cbz/Cbnz is generated. + // Prefer following interfaces to using vixl32::MacroAssembler::Cbz/Cbnz. + // In T32, Cbz/Cbnz instructions have following limitations: + // - Far targets, which are over 126 bytes away, are not supported. + // - Only low registers can be encoded. + // - Backward branches are not supported. + void CompareAndBranchIfZero(vixl32::Register rn, + vixl32::Label* label, + bool is_far_target = true); + void CompareAndBranchIfNonZero(vixl32::Register rn, + vixl32::Label* label, + bool is_far_target = true); + + // In T32 some of the instructions (add, mov, etc) outside an IT block + // have only 32-bit encodings. But there are 16-bit flag setting + // versions of these instructions (adds, movs, etc). In most of the + // cases in ART we don't care if the instructions keep flags or not; + // thus we can benefit from smaller code size. + // VIXL will never generate flag setting versions (for example, adds + // for Add macro instruction) unless vixl32::DontCare option is + // explicitly specified. That's why we introduce wrappers to use + // DontCare option by default. +#define WITH_FLAGS_DONT_CARE_RD_RN_OP(func_name) \ + void (func_name)(vixl32::Register rd, vixl32::Register rn, const vixl32::Operand& operand) { \ + MacroAssembler::func_name(vixl32::DontCare, rd, rn, operand); \ + } \ + using MacroAssembler::func_name + + WITH_FLAGS_DONT_CARE_RD_RN_OP(Adc); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Sub); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Sbc); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Rsb); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Rsc); + + WITH_FLAGS_DONT_CARE_RD_RN_OP(Eor); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Orr); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Orn); + WITH_FLAGS_DONT_CARE_RD_RN_OP(And); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Bic); + + WITH_FLAGS_DONT_CARE_RD_RN_OP(Asr); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Lsr); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Lsl); + WITH_FLAGS_DONT_CARE_RD_RN_OP(Ror); + +#undef WITH_FLAGS_DONT_CARE_RD_RN_OP + +#define WITH_FLAGS_DONT_CARE_RD_OP(func_name) \ + void (func_name)(vixl32::Register rd, const vixl32::Operand& operand) { \ + MacroAssembler::func_name(vixl32::DontCare, rd, operand); \ + } \ + using MacroAssembler::func_name + + WITH_FLAGS_DONT_CARE_RD_OP(Mvn); + WITH_FLAGS_DONT_CARE_RD_OP(Mov); + +#undef WITH_FLAGS_DONT_CARE_RD_OP + + // The following two functions don't fall into above categories. Overload them separately. + void Rrx(vixl32::Register rd, vixl32::Register rn) { + MacroAssembler::Rrx(vixl32::DontCare, rd, rn); + } + using MacroAssembler::Rrx; + + void Mul(vixl32::Register rd, vixl32::Register rn, vixl32::Register rm) { + MacroAssembler::Mul(vixl32::DontCare, rd, rn, rm); + } + using MacroAssembler::Mul; + + // TODO: Remove when MacroAssembler::Add(FlagsUpdate, Condition, Register, Register, Operand) + // makes the right decision about 16-bit encodings. + void Add(vixl32::Register rd, vixl32::Register rn, const vixl32::Operand& operand) { + if (rd.Is(rn) && operand.IsPlainRegister()) { + MacroAssembler::Add(rd, rn, operand); + } else { + MacroAssembler::Add(vixl32::DontCare, rd, rn, operand); + } + } + using MacroAssembler::Add; + + // These interfaces try to use 16-bit T2 encoding of B instruction. + void B(vixl32::Label* label); + // For B(label), we always try to use Narrow encoding, because 16-bit T2 encoding supports + // jumping within 2KB range. For B(cond, label), because the supported branch range is 256 + // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps. + void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true); + + // Use literal for generating double constant if it doesn't fit VMOV encoding. + void Vmov(vixl32::DRegister rd, double imm) { + if (vixl::VFP::IsImmFP64(imm)) { + MacroAssembler::Vmov(rd, imm); + } else { + MacroAssembler::Vldr(rd, imm); + } + } + using MacroAssembler::Vmov; +}; + +class ArmVIXLAssembler FINAL : public Assembler { + private: + class ArmException; + public: + explicit ArmVIXLAssembler(ArenaAllocator* arena) + : Assembler(arena) { + // Use Thumb2 instruction set. + vixl_masm_.UseT32(); + } + + virtual ~ArmVIXLAssembler() {} + ArmVIXLMacroAssembler* GetVIXLAssembler() { return &vixl_masm_; } + void FinalizeCode() OVERRIDE; + + // Size of generated code. + size_t CodeSize() const OVERRIDE; + const uint8_t* CodeBufferBaseAddress() const OVERRIDE; + + // Copy instructions out of assembly buffer into the given region of memory. + void FinalizeInstructions(const MemoryRegion& region) OVERRIDE; + + void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM"; + } + void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM"; + } + + // + // Heap poisoning. + // + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(vixl32::Register reg); + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(vixl32::Register reg); + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(vixl32::Register reg); + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(vixl32::Register reg); + + void StoreToOffset(StoreOperandType type, + vixl32::Register reg, + vixl32::Register base, + int32_t offset); + void StoreSToOffset(vixl32::SRegister source, vixl32::Register base, int32_t offset); + void StoreDToOffset(vixl32::DRegister source, vixl32::Register base, int32_t offset); + + void LoadImmediate(vixl32::Register dest, int32_t value); + void LoadFromOffset(LoadOperandType type, + vixl32::Register reg, + vixl32::Register base, + int32_t offset); + void LoadSFromOffset(vixl32::SRegister reg, vixl32::Register base, int32_t offset); + void LoadDFromOffset(vixl32::DRegister reg, vixl32::Register base, int32_t offset); + + void LoadRegisterList(RegList regs, size_t stack_offset); + void StoreRegisterList(RegList regs, size_t stack_offset); + + bool ShifterOperandCanAlwaysHold(uint32_t immediate); + bool ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc = kCcDontCare); + bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store); + int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, + vixl32::Register temp, + vixl32::Register base, + int32_t offset); + int32_t GetAllowedLoadOffsetBits(LoadOperandType type); + int32_t GetAllowedStoreOffsetBits(StoreOperandType type); + + void AddConstant(vixl32::Register rd, int32_t value); + void AddConstant(vixl32::Register rd, vixl32::Register rn, int32_t value); + void AddConstantInIt(vixl32::Register rd, + vixl32::Register rn, + int32_t value, + vixl32::Condition cond = vixl32::al); + + template <typename T> + vixl::aarch32::Literal<T>* CreateLiteralDestroyedWithPool(T value) { + vixl::aarch32::Literal<T>* literal = + new vixl::aarch32::Literal<T>(value, + vixl32::RawLiteral::kPlacedWhenUsed, + vixl32::RawLiteral::kDeletedOnPoolDestruction); + return literal; + } + + private: + // VIXL assembler. + ArmVIXLMacroAssembler vixl_masm_; +}; + +// Thread register declaration. +extern const vixl32::Register tr; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_VIXL_H_ diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 546dd653af..1e71d06b49 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1917,7 +1917,7 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { case kLongOrFPLiteral1KiB: return 4u; - case kLongOrFPLiteral256KiB: + case kLongOrFPLiteral64KiB: return 10u; case kLongOrFPLiteralFar: return 14u; @@ -1989,7 +1989,7 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con break; case kLiteral1MiB: case kLiteral64KiB: - case kLongOrFPLiteral256KiB: + case kLongOrFPLiteral64KiB: case kLiteralAddr64KiB: DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. diff -= 4; // One extra 32-bit MOV. @@ -2018,6 +2018,45 @@ inline size_t Thumb2Assembler::Fixup::IncreaseSize(Size new_size) { return adjustment; } +bool Thumb2Assembler::Fixup::IsCandidateForEmitEarly() const { + DCHECK(size_ == original_size_); + if (target_ == kUnresolved) { + return false; + } + // GetOffset() does not depend on current_code_size for branches, only for literals. + constexpr uint32_t current_code_size = 0u; + switch (GetSize()) { + case kBranch16Bit: + return IsInt(cond_ != AL ? 9 : 12, GetOffset(current_code_size)); + case kBranch32Bit: + // We don't support conditional branches beyond +-1MiB + // or unconditional branches beyond +-16MiB. + return true; + + case kCbxz16Bit: + return IsUint<7>(GetOffset(current_code_size)); + case kCbxz32Bit: + return IsInt<9>(GetOffset(current_code_size)); + case kCbxz48Bit: + // We don't support conditional branches beyond +-1MiB. + return true; + + case kLiteral1KiB: + case kLiteral4KiB: + case kLiteral64KiB: + case kLiteral1MiB: + case kLiteralFar: + case kLiteralAddr1KiB: + case kLiteralAddr4KiB: + case kLiteralAddr64KiB: + case kLiteralAddrFar: + case kLongOrFPLiteral1KiB: + case kLongOrFPLiteral64KiB: + case kLongOrFPLiteralFar: + return false; + } +} + uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) { uint32_t old_code_size = current_code_size; switch (GetSize()) { @@ -2105,10 +2144,10 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) if (IsUint<10>(GetOffset(current_code_size))) { break; } - current_code_size += IncreaseSize(kLongOrFPLiteral256KiB); + current_code_size += IncreaseSize(kLongOrFPLiteral64KiB); FALLTHROUGH_INTENDED; - case kLongOrFPLiteral256KiB: - if (IsUint<18>(GetOffset(current_code_size))) { + case kLongOrFPLiteral64KiB: + if (IsUint<16>(GetOffset(current_code_size))) { break; } current_code_size += IncreaseSize(kLongOrFPLiteralFar); @@ -2269,11 +2308,10 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } - case kLongOrFPLiteral256KiB: { - int32_t offset = GetOffset(code_size); - int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff); + case kLongOrFPLiteral64KiB: { + int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); - int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff); // DCHECKs type_. + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_. buffer->Store<int16_t>(location_, mov_encoding >> 16); buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); @@ -2326,7 +2364,7 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } Register rn = ad.GetRegister(); - if (IsHighRegister(rn) && rn != SP && rn != PC) { + if (IsHighRegister(rn) && (byte || half || (rn != SP && rn != PC))) { must_be_32bit = true; } @@ -2338,24 +2376,24 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, // Immediate offset int32_t offset = ad.GetOffset(); - // The 16 bit SP relative instruction can only have a 10 bit offset. - if (rn == SP && offset >= (1 << 10)) { - must_be_32bit = true; - } - if (byte) { // 5 bit offset, no shift. - if (offset >= (1 << 5)) { + if ((offset & ~0x1f) != 0) { must_be_32bit = true; } } else if (half) { - // 6 bit offset, shifted by 1. - if (offset >= (1 << 6)) { + // 5 bit offset, shifted by 1. + if ((offset & ~(0x1f << 1)) != 0) { + must_be_32bit = true; + } + } else if (rn == SP || rn == PC) { + // The 16 bit SP/PC relative instruction can only have an (imm8 << 2) offset. + if ((offset & ~(0xff << 2)) != 0) { must_be_32bit = true; } } else { - // 7 bit offset, shifted by 2. - if (offset >= (1 << 7)) { + // 5 bit offset, shifted by 2. + if ((offset & ~(0x1f << 2)) != 0) { must_be_32bit = true; } } @@ -2371,7 +2409,7 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } else { // 16 bit thumb1. uint8_t opA = 0; - bool sp_relative = false; + bool sp_or_pc_relative = false; if (byte) { opA = 7U /* 0b0111 */; @@ -2380,7 +2418,10 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } else { if (rn == SP) { opA = 9U /* 0b1001 */; - sp_relative = true; + sp_or_pc_relative = true; + } else if (rn == PC) { + opA = 4U; + sp_or_pc_relative = true; } else { opA = 6U /* 0b0110 */; } @@ -2389,7 +2430,7 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, (load ? B11 : 0); CHECK_GE(offset, 0); - if (sp_relative) { + if (sp_or_pc_relative) { // SP relative, 10 bit offset. CHECK_LT(offset, (1 << 10)); CHECK_ALIGNED(offset, 4); @@ -2420,55 +2461,36 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } } else { // Register shift. - if (ad.GetRegister() == PC) { - // PC relative literal encoding. - int32_t offset = ad.GetOffset(); - if (must_be_32bit || offset < 0 || offset >= (1 << 10) || !load) { - int32_t up = B23; - if (offset < 0) { - offset = -offset; - up = 0; - } - CHECK_LT(offset, (1 << 12)); - int32_t encoding = 0x1f << 27 | 0xf << 16 | B22 | (load ? B20 : 0) | - offset | up | - static_cast<uint32_t>(rd) << 12; - Emit32(encoding); - } else { - // 16 bit literal load. - CHECK_GE(offset, 0); - CHECK_LT(offset, (1 << 10)); - int32_t encoding = B14 | (load ? B11 : 0) | static_cast<uint32_t>(rd) << 8 | offset >> 2; - Emit16(encoding); + CHECK_NE(ad.GetRegister(), PC); + if (ad.GetShiftCount() != 0) { + // If there is a shift count this must be 32 bit. + must_be_32bit = true; + } else if (IsHighRegister(ad.GetRegisterOffset())) { + must_be_32bit = true; + } + + if (must_be_32bit) { + int32_t encoding = 0x1f << 27 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 | + ad.encodingThumb(true); + if (half) { + encoding |= B21; + } else if (!byte) { + encoding |= B22; } - } else { - if (ad.GetShiftCount() != 0) { - // If there is a shift count this must be 32 bit. - must_be_32bit = true; - } else if (IsHighRegister(ad.GetRegisterOffset())) { - must_be_32bit = true; + if (load && is_signed && (byte || half)) { + encoding |= B24; } - - if (must_be_32bit) { - int32_t encoding = 0x1f << 27 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 | - ad.encodingThumb(true); - if (half) { - encoding |= B21; - } else if (!byte) { - encoding |= B22; - } - Emit32(encoding); - } else { - // 16 bit register offset. - int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) | - ad.encodingThumb(false); - if (byte) { - encoding |= B10; - } else if (half) { - encoding |= B9; - } - Emit16(encoding); + Emit32(encoding); + } else { + // 16 bit register offset. + int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) | + ad.encodingThumb(false); + if (byte) { + encoding |= B10; + } else if (half) { + encoding |= B9; } + Emit16(encoding); } } } @@ -2808,7 +2830,7 @@ void Thumb2Assembler::strexd(Register rd, Register rt, Register rt2, Register rn void Thumb2Assembler::clrex(Condition cond) { CheckCondition(cond); - int32_t encoding = B31 | B30 | B29 | B27 | B28 | B25 | B24 | B23 | + int32_t encoding = B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20 | 0xf << 16 | B15 | @@ -3015,9 +3037,49 @@ void Thumb2Assembler::vpopd(DRegister reg, int nregs, Condition cond) { } +void Thumb2Assembler::vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond) { + int32_t rest = B23; + EmitVLdmOrStm(rest, + static_cast<uint32_t>(reg), + nregs, + base_reg, + /*is_load*/ true, + /*dbl*/ true, + cond); +} + + +void Thumb2Assembler::vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond) { + int32_t rest = B23; + EmitVLdmOrStm(rest, + static_cast<uint32_t>(reg), + nregs, + base_reg, + /*is_load*/ false, + /*dbl*/ true, + cond); +} + + void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) { + int32_t rest = B21 | (push ? B24 : B23); + EmitVLdmOrStm(rest, reg, nregs, SP, /*is_load*/ !push, dbl, cond); +} + + +void Thumb2Assembler::EmitVLdmOrStm(int32_t rest, + uint32_t reg, + int nregs, + Register rn, + bool is_load, + bool dbl, + Condition cond) { CheckCondition(cond); + DCHECK_GT(nregs, 0); + DCHECK_LE(reg + nregs, 32u); + DCHECK(!dbl || (nregs <= 16)); + uint32_t D; uint32_t Vd; if (dbl) { @@ -3029,14 +3091,17 @@ void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, D = reg & 1; Vd = (reg >> 1) & 15U /* 0b1111 */; } - int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 | - B11 | B9 | - (dbl ? B8 : 0) | - (push ? B24 : (B23 | B20)) | - 14U /* 0b1110 */ << 28 | - nregs << (dbl ? 1 : 0) | - D << 22 | - Vd << 12; + + int32_t encoding = rest | + 14U /* 0b1110 */ << 28 | + B27 | B26 | B11 | B9 | + (is_load ? B20 : 0) | + static_cast<int16_t>(rn) << 16 | + D << 22 | + Vd << 12 | + (dbl ? B8 : 0) | + nregs << (dbl ? 1 : 0); + Emit32(encoding); } @@ -3117,6 +3182,30 @@ void Thumb2Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR. Emit32(encoding); } +void Thumb2Assembler::vcntd(DRegister dd, DRegister dm) { + uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) | + ((static_cast<int32_t>(dd) >> 4) * B22) | + ((static_cast<uint32_t>(dd) & 0xf) * B12) | + (B10 | B8) | + ((static_cast<int32_t>(dm) >> 4) * B5) | + (static_cast<uint32_t>(dm) & 0xf); + + Emit32(encoding); +} + +void Thumb2Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) { + CHECK(size == 8 || size == 16 || size == 32) << size; + uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) | + ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) | + ((static_cast<int32_t>(dd) >> 4) * B22) | + ((static_cast<uint32_t>(dd) & 0xf) * B12) | + (B9) | + (is_unsigned ? B7 : 0) | + ((static_cast<int32_t>(dm) >> 4) * B5) | + (static_cast<uint32_t>(dm) & 0xf); + + Emit32(encoding); +} void Thumb2Assembler::svc(uint32_t imm8) { CHECK(IsUint<8>(imm8)) << imm8; @@ -3261,6 +3350,30 @@ void Thumb2Assembler::PopList(RegList regs, Condition cond) { ldm(IA_W, SP, regs, cond); } +void Thumb2Assembler::StoreList(RegList regs, size_t stack_offset) { + DCHECK_NE(regs, 0u); + DCHECK_EQ(regs & (1u << IP), 0u); + if (IsPowerOfTwo(regs)) { + Register reg = static_cast<Register>(CTZ(static_cast<uint32_t>(regs))); + str(reg, Address(SP, stack_offset)); + } else { + add(IP, SP, ShifterOperand(stack_offset)); + stm(IA, IP, regs); + } +} + +void Thumb2Assembler::LoadList(RegList regs, size_t stack_offset) { + DCHECK_NE(regs, 0u); + DCHECK_EQ(regs & (1u << IP), 0u); + if (IsPowerOfTwo(regs)) { + Register reg = static_cast<Register>(CTZ(static_cast<uint32_t>(regs))); + ldr(reg, Address(SP, stack_offset)); + } else { + Register lowest_reg = static_cast<Register>(CTZ(static_cast<uint32_t>(regs))); + add(lowest_reg, SP, ShifterOperand(stack_offset)); + ldm(IA, lowest_reg, regs); + } +} void Thumb2Assembler::Mov(Register rd, Register rm, Condition cond) { if (cond != AL || rd != rm) { @@ -3271,6 +3384,30 @@ void Thumb2Assembler::Mov(Register rd, Register rm, Condition cond) { void Thumb2Assembler::Bind(Label* label) { BindLabel(label, buffer_.Size()); + + // Try to emit some Fixups now to reduce the memory needed during the branch fixup later. + while (!fixups_.empty() && fixups_.back().IsCandidateForEmitEarly()) { + const Fixup& last_fixup = fixups_.back(); + // Fixups are ordered by location, so the candidate can surely be emitted if it is + // a forward branch. If it's a backward branch, it may go over any number of other + // fixups. We could check for any number of emit early candidates but we want this + // heuristics to be quick, so check just one. + uint32_t target = last_fixup.GetTarget(); + if (target < last_fixup.GetLocation() && + fixups_.size() >= 2u && + fixups_[fixups_.size() - 2u].GetLocation() >= target) { + const Fixup& prev_fixup = fixups_[fixups_.size() - 2u]; + if (!prev_fixup.IsCandidateForEmitEarly()) { + break; + } + uint32_t min_target = std::min(target, prev_fixup.GetTarget()); + if (fixups_.size() >= 3u && fixups_[fixups_.size() - 3u].GetLocation() >= min_target) { + break; + } + } + last_fixup.Emit(&buffer_, buffer_.Size()); + fixups_.pop_back(); + } } @@ -3574,6 +3711,24 @@ void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) } } +void Thumb2Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) { + if (!vmovd(dd, value, cond)) { + uint64_t int_value = bit_cast<uint64_t, double>(value); + if (int_value == bit_cast<uint64_t, double>(0.0)) { + // 0.0 is quite common, so we special case it by loading + // 2.0 in `dd` and then subtracting it. + bool success = vmovd(dd, 2.0, cond); + CHECK(success); + vsubd(dd, dd, dd, cond); + } else { + Literal* literal = literal64_dedupe_map_.GetOrCreate( + int_value, + [this, int_value]() { return NewLiteral<uint64_t>(int_value); }); + LoadLiteral(dd, literal); + } + } +} + int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) { switch (type) { case kLoadSignedByte: @@ -3816,12 +3971,6 @@ void Thumb2Assembler::StoreDToOffset(DRegister reg, } -void Thumb2Assembler::MemoryBarrier(ManagedRegister mscratch) { - CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12); - dmb(SY); -} - - void Thumb2Assembler::dmb(DmbOptions flavor) { int32_t encoding = 0xf3bf8f50; // dmb in T1 encoding. Emit32(encoding | flavor); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index ce310a4da8..1c495aa7a7 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -22,11 +22,11 @@ #include <vector> #include "base/arena_containers.h" +#include "base/array_ref.h" #include "base/logging.h" #include "constants_arm.h" #include "utils/arm/managed_register_arm.h" #include "utils/arm/assembler_arm.h" -#include "utils/array_ref.h" #include "offsets.h" namespace art { @@ -43,6 +43,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { fixups_(arena->Adapter(kArenaAllocAssembler)), fixup_dependents_(arena->Adapter(kArenaAllocAssembler)), literals_(arena->Adapter(kArenaAllocAssembler)), + literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)), jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0u), last_old_position_(0u), @@ -250,10 +251,15 @@ class Thumb2Assembler FINAL : public ArmAssembler { void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE; void vmstat(Condition cond = AL) OVERRIDE; // VMRS APSR_nzcv, FPSCR + void vcntd(DRegister dd, DRegister dm) OVERRIDE; + void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE; + void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; + void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; + void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; // Branch instructions. void b(Label* label, Condition cond = AL); @@ -287,6 +293,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { void PushList(RegList regs, Condition cond = AL) OVERRIDE; void PopList(RegList regs, Condition cond = AL) OVERRIDE; + void StoreList(RegList regs, size_t stack_offset) OVERRIDE; + void LoadList(RegList regs, size_t stack_offset) OVERRIDE; void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; @@ -316,6 +324,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load and Store. May clobber IP. void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; + void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE; void MarkExceptionHandler(Label* label) OVERRIDE; void LoadFromOffset(LoadOperandType type, Register reg, @@ -363,8 +372,6 @@ class Thumb2Assembler FINAL : public ArmAssembler { void Emit16(int16_t value); // Emit a 16 bit instruction in little endian format. void Bind(Label* label) OVERRIDE; - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; - // Force the assembler to generate 32 bit instructions. void Force32Bit() { force_32bit_ = true; @@ -461,8 +468,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load long or FP literal variants. // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. kLongOrFPLiteral1KiB, - // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes. - kLongOrFPLiteral256KiB, + // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes. + kLongOrFPLiteral64KiB, // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. kLongOrFPLiteralFar, }; @@ -497,7 +504,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load wide literal. static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, Size size = kLongOrFPLiteral1KiB) { - DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); return Fixup(rt, rt2, kNoSRegister, kNoDRegister, @@ -507,7 +514,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load FP single literal. static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, Size size = kLongOrFPLiteral1KiB) { - DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, AL, kLoadFPLiteralSingle, size, location); @@ -516,7 +523,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Load FP double literal. static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, Size size = kLongOrFPLiteral1KiB) { - DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB || size == kLongOrFPLiteralFar); return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, AL, kLoadFPLiteralDouble, size, location); @@ -568,6 +575,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { return location_; } + uint32_t GetTarget() const { + return target_; + } + uint32_t GetAdjustment() const { return adjustment_; } @@ -587,6 +598,11 @@ class Thumb2Assembler FINAL : public ArmAssembler { target_ = target; } + // Branches with bound targets that are in range can be emitted early. + // However, the caller still needs to check if the branch doesn't go over + // another Fixup that's not ready to be emitted. + bool IsCandidateForEmitEarly() const; + // Check if the current size is OK for current location_, target_ and adjustment_. // If not, increase the size. Return the size increase, 0 if unchanged. // If the target if after this Fixup, also add the difference to adjustment_, @@ -745,6 +761,14 @@ class Thumb2Assembler FINAL : public ArmAssembler { SRegister sn, SRegister sm); + void EmitVLdmOrStm(int32_t rest, + uint32_t reg, + int nregs, + Register rn, + bool is_load, + bool dbl, + Condition cond); + void EmitVFPddd(Condition cond, int32_t opcode, DRegister dd, @@ -867,6 +891,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { // without invalidating pointers and references to existing elements. ArenaDeque<Literal> literals_; + // Deduplication map for 64-bit literals, used for LoadDImmediate(). + ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_; + // Jump table list. ArenaDeque<JumpTable> jump_tables_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index b5cafcbf66..0147a76744 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -16,12 +16,15 @@ #include "assembler_thumb2.h" +#include "android-base/stringprintf.h" + #include "base/stl_util.h" -#include "base/stringprintf.h" #include "utils/assembler_test.h" namespace art { +using android::base::StringPrintf; + class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler, arm::Register, arm::SRegister, uint32_t> { @@ -207,6 +210,13 @@ TEST_F(AssemblerThumb2Test, strexd) { DriverStr(expected, "strexd"); } +TEST_F(AssemblerThumb2Test, clrex) { + __ clrex(); + + const char* expected = "clrex\n"; + DriverStr(expected, "clrex"); +} + TEST_F(AssemblerThumb2Test, LdrdStrd) { __ ldrd(arm::R0, arm::Address(arm::R2, 8)); __ ldrd(arm::R0, arm::Address(arm::R12)); @@ -279,6 +289,148 @@ TEST_F(AssemblerThumb2Test, smull) { DriverStr(expected, "smull"); } +TEST_F(AssemblerThumb2Test, LoadByteFromThumbOffset) { + arm::LoadOperandType type = arm::kLoadUnsignedByte; + + __ LoadFromOffset(type, arm::R0, arm::R7, 0); + __ LoadFromOffset(type, arm::R1, arm::R7, 31); + __ LoadFromOffset(type, arm::R2, arm::R7, 32); + __ LoadFromOffset(type, arm::R3, arm::R7, 4095); + __ LoadFromOffset(type, arm::R4, arm::SP, 0); + + const char* expected = + "ldrb r0, [r7, #0]\n" + "ldrb r1, [r7, #31]\n" + "ldrb.w r2, [r7, #32]\n" + "ldrb.w r3, [r7, #4095]\n" + "ldrb.w r4, [sp, #0]\n"; + DriverStr(expected, "LoadByteFromThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreByteToThumbOffset) { + arm::StoreOperandType type = arm::kStoreByte; + + __ StoreToOffset(type, arm::R0, arm::R7, 0); + __ StoreToOffset(type, arm::R1, arm::R7, 31); + __ StoreToOffset(type, arm::R2, arm::R7, 32); + __ StoreToOffset(type, arm::R3, arm::R7, 4095); + __ StoreToOffset(type, arm::R4, arm::SP, 0); + + const char* expected = + "strb r0, [r7, #0]\n" + "strb r1, [r7, #31]\n" + "strb.w r2, [r7, #32]\n" + "strb.w r3, [r7, #4095]\n" + "strb.w r4, [sp, #0]\n"; + DriverStr(expected, "StoreByteToThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, LoadHalfFromThumbOffset) { + arm::LoadOperandType type = arm::kLoadUnsignedHalfword; + + __ LoadFromOffset(type, arm::R0, arm::R7, 0); + __ LoadFromOffset(type, arm::R1, arm::R7, 62); + __ LoadFromOffset(type, arm::R2, arm::R7, 64); + __ LoadFromOffset(type, arm::R3, arm::R7, 4094); + __ LoadFromOffset(type, arm::R4, arm::SP, 0); + __ LoadFromOffset(type, arm::R5, arm::R7, 1); // Unaligned + + const char* expected = + "ldrh r0, [r7, #0]\n" + "ldrh r1, [r7, #62]\n" + "ldrh.w r2, [r7, #64]\n" + "ldrh.w r3, [r7, #4094]\n" + "ldrh.w r4, [sp, #0]\n" + "ldrh.w r5, [r7, #1]\n"; + DriverStr(expected, "LoadHalfFromThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreHalfToThumbOffset) { + arm::StoreOperandType type = arm::kStoreHalfword; + + __ StoreToOffset(type, arm::R0, arm::R7, 0); + __ StoreToOffset(type, arm::R1, arm::R7, 62); + __ StoreToOffset(type, arm::R2, arm::R7, 64); + __ StoreToOffset(type, arm::R3, arm::R7, 4094); + __ StoreToOffset(type, arm::R4, arm::SP, 0); + __ StoreToOffset(type, arm::R5, arm::R7, 1); // Unaligned + + const char* expected = + "strh r0, [r7, #0]\n" + "strh r1, [r7, #62]\n" + "strh.w r2, [r7, #64]\n" + "strh.w r3, [r7, #4094]\n" + "strh.w r4, [sp, #0]\n" + "strh.w r5, [r7, #1]\n"; + DriverStr(expected, "StoreHalfToThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, LoadWordFromSpPlusOffset) { + arm::LoadOperandType type = arm::kLoadWord; + + __ LoadFromOffset(type, arm::R0, arm::SP, 0); + __ LoadFromOffset(type, arm::R1, arm::SP, 124); + __ LoadFromOffset(type, arm::R2, arm::SP, 128); + __ LoadFromOffset(type, arm::R3, arm::SP, 1020); + __ LoadFromOffset(type, arm::R4, arm::SP, 1024); + __ LoadFromOffset(type, arm::R5, arm::SP, 4092); + __ LoadFromOffset(type, arm::R6, arm::SP, 1); // Unaligned + + const char* expected = + "ldr r0, [sp, #0]\n" + "ldr r1, [sp, #124]\n" + "ldr r2, [sp, #128]\n" + "ldr r3, [sp, #1020]\n" + "ldr.w r4, [sp, #1024]\n" + "ldr.w r5, [sp, #4092]\n" + "ldr.w r6, [sp, #1]\n"; + DriverStr(expected, "LoadWordFromSpPlusOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordToSpPlusOffset) { + arm::StoreOperandType type = arm::kStoreWord; + + __ StoreToOffset(type, arm::R0, arm::SP, 0); + __ StoreToOffset(type, arm::R1, arm::SP, 124); + __ StoreToOffset(type, arm::R2, arm::SP, 128); + __ StoreToOffset(type, arm::R3, arm::SP, 1020); + __ StoreToOffset(type, arm::R4, arm::SP, 1024); + __ StoreToOffset(type, arm::R5, arm::SP, 4092); + __ StoreToOffset(type, arm::R6, arm::SP, 1); // Unaligned + + const char* expected = + "str r0, [sp, #0]\n" + "str r1, [sp, #124]\n" + "str r2, [sp, #128]\n" + "str r3, [sp, #1020]\n" + "str.w r4, [sp, #1024]\n" + "str.w r5, [sp, #4092]\n" + "str.w r6, [sp, #1]\n"; + DriverStr(expected, "StoreWordToSpPlusOffset"); +} + +TEST_F(AssemblerThumb2Test, LoadWordFromPcPlusOffset) { + arm::LoadOperandType type = arm::kLoadWord; + + __ LoadFromOffset(type, arm::R0, arm::PC, 0); + __ LoadFromOffset(type, arm::R1, arm::PC, 124); + __ LoadFromOffset(type, arm::R2, arm::PC, 128); + __ LoadFromOffset(type, arm::R3, arm::PC, 1020); + __ LoadFromOffset(type, arm::R4, arm::PC, 1024); + __ LoadFromOffset(type, arm::R5, arm::PC, 4092); + __ LoadFromOffset(type, arm::R6, arm::PC, 1); // Unaligned + + const char* expected = + "ldr r0, [pc, #0]\n" + "ldr r1, [pc, #124]\n" + "ldr r2, [pc, #128]\n" + "ldr r3, [pc, #1020]\n" + "ldr.w r4, [pc, #1024]\n" + "ldr.w r5, [pc, #4092]\n" + "ldr.w r6, [pc, #1]\n"; + DriverStr(expected, "LoadWordFromPcPlusOffset"); +} + TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) { arm::StoreOperandType type = arm::kStoreWord; int32_t offset = 4092; @@ -869,10 +1021,11 @@ TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) { } std::string expected = - "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x408 - 0x4 - 4)\n" "1:\n" "add ip, pc\n" - "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + "ldrd r1, r3, [ip, #0]\n" + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + ".align 2, 0\n" "2:\n" @@ -884,48 +1037,78 @@ TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) { __ GetAdjustedPosition(label.Position())); } -TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) { +TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB) { // The literal size must match but the type doesn't, so use an int32_t rather than float. arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); __ LoadLiteral(arm::S3, literal); Label label; __ Bind(&label); - constexpr size_t kLdrR0R0Count = (1 << 17) - 3u; + constexpr size_t kLdrR0R0Count = (1 << 15) - 3u; for (size_t i = 0; i != kLdrR0R0Count; ++i) { __ ldr(arm::R0, arm::Address(arm::R0)); } std::string expected = - "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x10004 - 0x4 - 4)\n" "1:\n" "add ip, pc\n" - "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + "vldr s3, [ip, #0]\n" + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + ".align 2, 0\n" "2:\n" ".word 0x12345678\n"; - DriverStr(expected, "LoadLiteralSingleMax256KiB"); + DriverStr(expected, "LoadLiteralSingleMax64KiB"); EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, __ GetAdjustedPosition(label.Position())); } -TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) { +TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB_UnalignedPC) { + // The literal size must match but the type doesn't, so use an int32_t rather than float. + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ ldr(arm::R0, arm::Address(arm::R0)); + __ LoadLiteral(arm::S3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 15) - 4u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "ldr r0, [r0]\n" + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x10004 - 0x6 - 4)\n" + "1:\n" + "add ip, pc\n" + "vldr s3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralSingleMax64KiB_UnalignedPC"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax64KiB) { // The literal size must match but the type doesn't, so use an int64_t rather than double. arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); __ LoadLiteral(arm::D3, literal); Label label; __ Bind(&label); - constexpr size_t kLdrR0R0Count = (1 << 17) - 2u; + constexpr size_t kLdrR0R0Count = (1 << 15) - 2u; for (size_t i = 0; i != kLdrR0R0Count; ++i) { __ ldr(arm::R0, arm::Address(arm::R0)); } std::string expected = // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. - "movw ip, #(0x40000 & 0xffff)\n" + "movw ip, #((0x1000c - 0x8 - 4) & 0xffff)\n" // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. - "movt ip, #(0x40000 >> 16)\n" + "movt ip, #((0x1000c - 0x8 - 4) >> 16)\n" "1:\n" "add ip, pc\n" "vldr d3, [ip, #0]\n" + @@ -934,7 +1117,7 @@ TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) { "2:\n" ".word 0x87654321\n" ".word 0x12345678\n"; - DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB"); + DriverStr(expected, "LoadLiteralDoubleBeyondMax64KiB"); EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, __ GetAdjustedPosition(label.Position())); @@ -946,16 +1129,16 @@ TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) { __ LoadLiteral(arm::D3, literal); Label label; __ Bind(&label); - constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234; + constexpr size_t kLdrR0R0Count = (1 << 15) - 2u + 0x1234; for (size_t i = 0; i != kLdrR0R0Count; ++i) { __ ldr(arm::R0, arm::Address(arm::R0)); } std::string expected = // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. - "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n" + "movw ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) & 0xffff)\n" // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. - "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n" + "movt ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) >> 16)\n" "1:\n" "add ip, pc\n" "vldr d3, [ip, #0]\n" + @@ -1380,4 +1563,104 @@ TEST_F(AssemblerThumb2Test, revsh) { DriverStr(expected, "revsh"); } +TEST_F(AssemblerThumb2Test, vcnt) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + __ vcntd(arm::D0, arm::D1); + __ vcntd(arm::D19, arm::D20); + __ vcntd(arm::D0, arm::D9); + __ vcntd(arm::D16, arm::D20); + + std::string expected = + "vcnt.8 d0, d1\n" + "vcnt.8 d19, d20\n" + "vcnt.8 d0, d9\n" + "vcnt.8 d16, d20\n"; + + DriverStr(expected, "vcnt"); +} + +TEST_F(AssemblerThumb2Test, vpaddl) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + // Different data types (signed and unsigned) are also tested. + __ vpaddld(arm::D0, arm::D0, 8, true); + __ vpaddld(arm::D20, arm::D20, 8, false); + __ vpaddld(arm::D0, arm::D20, 16, false); + __ vpaddld(arm::D20, arm::D0, 32, true); + + std::string expected = + "vpaddl.u8 d0, d0\n" + "vpaddl.s8 d20, d20\n" + "vpaddl.s16 d0, d20\n" + "vpaddl.u32 d20, d0\n"; + + DriverStr(expected, "vpaddl"); +} + +TEST_F(AssemblerThumb2Test, LoadFromShiftedRegOffset) { + arm::Address mem_address(arm::R0, arm::R1, arm::Shift::LSL, 2); + + __ ldrsb(arm::R2, mem_address); + __ ldrb(arm::R2, mem_address); + __ ldrsh(arm::R2, mem_address); + __ ldrh(arm::R2, mem_address); + __ ldr(arm::R2, mem_address); + + std::string expected = + "ldrsb r2, [r0, r1, LSL #2]\n" + "ldrb r2, [r0, r1, LSL #2]\n" + "ldrsh r2, [r0, r1, LSL #2]\n" + "ldrh r2, [r0, r1, LSL #2]\n" + "ldr r2, [r0, r1, LSL #2]\n"; + + DriverStr(expected, "LoadFromShiftedRegOffset"); +} + +TEST_F(AssemblerThumb2Test, VStmLdmPushPop) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + // Different data types (signed and unsigned) are also tested. + __ vstmiad(arm::R0, arm::D0, 4); + __ vldmiad(arm::R1, arm::D9, 5); + __ vpopd(arm::D0, 4); + __ vpushd(arm::D9, 5); + __ vpops(arm::S0, 4); + __ vpushs(arm::S9, 5); + __ vpushs(arm::S16, 5); + __ vpushd(arm::D0, 16); + __ vpushd(arm::D1, 15); + __ vpushd(arm::D8, 16); + __ vpushd(arm::D31, 1); + __ vpushs(arm::S0, 32); + __ vpushs(arm::S1, 31); + __ vpushs(arm::S16, 16); + __ vpushs(arm::S31, 1); + + std::string expected = + "vstmia r0, {d0 - d3}\n" + "vldmia r1, {d9 - d13}\n" + "vpop {d0 - d3}\n" + "vpush {d9 - d13}\n" + "vpop {s0 - s3}\n" + "vpush {s9 - s13}\n" + "vpush {s16 - s20}\n" + "vpush {d0 - d15}\n" + "vpush {d1 - d15}\n" + "vpush {d8 - d23}\n" + "vpush {d31}\n" + "vpush {s0 - s31}\n" + "vpush {s1 - s31}\n" + "vpush {s16 - s31}\n" + "vpush {s31}\n"; + + DriverStr(expected, "VStmLdmPushPop"); +} + } // namespace art diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc new file mode 100644 index 0000000000..3f425dfaf5 --- /dev/null +++ b/compiler/utils/arm/jni_macro_assembler_arm.cc @@ -0,0 +1,659 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_arm.h" + +#include <algorithm> + +#include "assembler_thumb2.h" +#include "base/arena_allocator.h" +#include "base/bit_utils.h" +#include "base/logging.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "offsets.h" +#include "thread.h" + +namespace art { +namespace arm { + +constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize); + +// Slowpath entered when Thread::Current()->_exception is non-null +class ArmExceptionSlowPath FINAL : public SlowPath { + public: + ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) { + } + void Emit(Assembler *sp_asm) OVERRIDE; + private: + const ArmManagedRegister scratch_; + const size_t stack_adjust_; +}; + +ArmJNIMacroAssembler::ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa) { + switch (isa) { + case kArm: + case kThumb2: + asm_.reset(new (arena) Thumb2Assembler(arena)); + break; + + default: + LOG(FATAL) << isa; + UNREACHABLE(); + } +} + +ArmJNIMacroAssembler::~ArmJNIMacroAssembler() { +} + +size_t ArmJNIMacroAssembler::CodeSize() const { + return asm_->CodeSize(); +} + +DebugFrameOpCodeWriterForAssembler& ArmJNIMacroAssembler::cfi() { + return asm_->cfi(); +} + +void ArmJNIMacroAssembler::FinalizeCode() { + asm_->FinalizeCode(); +} + +void ArmJNIMacroAssembler::FinalizeInstructions(const MemoryRegion& region) { + asm_->FinalizeInstructions(region); +} + +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg)); +} + +#define __ asm_-> + +void ArmJNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) { + CHECK_EQ(CodeSize(), 0U); // Nothing emitted yet + CHECK_ALIGNED(frame_size, kStackAlignment); + CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister()); + + // Push callee saves and link register. + RegList core_spill_mask = 1 << LR; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); + } else { + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); + } + } + __ PushList(core_spill_mask); + cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); + cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize); + if (fp_spill_mask != 0) { + __ vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); + cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize); + } + + // Increase frame to required size. + int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); + CHECK_GT(frame_size, pushed_values * kFramePointerSize); // Must at least have space for Method*. + IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. + + // Write out Method*. + __ StoreToOffset(kStoreWord, R0, SP, 0); + + // Write out entry spills. + int32_t offset = frame_size + kFramePointerSize; + for (size_t i = 0; i < entry_spills.size(); ++i) { + ArmManagedRegister reg = entry_spills.at(i).AsArm(); + if (reg.IsNoRegister()) { + // only increment stack offset. + ManagedRegisterSpill spill = entry_spills.at(i); + offset += spill.getSize(); + } else if (reg.IsCoreRegister()) { + __ StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset); + offset += 4; + } else if (reg.IsSRegister()) { + __ StoreSToOffset(reg.AsSRegister(), SP, offset); + offset += 4; + } else if (reg.IsDRegister()) { + __ StoreDToOffset(reg.AsDRegister(), SP, offset); + offset += 8; + } + } +} + +void ArmJNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs) { + CHECK_ALIGNED(frame_size, kStackAlignment); + cfi().RememberState(); + + // Compute callee saves to pop and PC. + RegList core_spill_mask = 1 << PC; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); + } else { + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); + } + } + + // Decrease frame to start of callee saves. + int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); + CHECK_GT(frame_size, pop_values * kFramePointerSize); + DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. + + if (fp_spill_mask != 0) { + __ vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); + cfi().RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask); + } + + // Pop callee saves and PC. + __ PopList(core_spill_mask); + + // The CFI should be restored for any code that follows the exit block. + cfi().RestoreState(); + cfi().DefCFAOffset(frame_size); +} + +void ArmJNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + __ AddConstant(SP, -adjust); + cfi().AdjustCFAOffset(adjust); +} + +static void DecreaseFrameSizeImpl(ArmAssembler* assembler, size_t adjust) { + assembler->AddConstant(SP, adjust); + assembler->cfi().AdjustCFAOffset(-adjust); +} + +void ArmJNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + DecreaseFrameSizeImpl(asm_.get(), adjust); +} + +void ArmJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { + ArmManagedRegister src = msrc.AsArm(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsCoreRegister()) { + CHECK_EQ(4u, size); + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); + } else if (src.IsRegisterPair()) { + CHECK_EQ(8u, size); + __ StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value()); + __ StoreToOffset(kStoreWord, src.AsRegisterPairHigh(), SP, dest.Int32Value() + 4); + } else if (src.IsSRegister()) { + __ StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value()); + } else { + CHECK(src.IsDRegister()) << src; + __ StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value()); + } +} + +void ArmJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { + ArmManagedRegister src = msrc.AsArm(); + CHECK(src.IsCoreRegister()) << src; + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { + ArmManagedRegister src = msrc.AsArm(); + CHECK(src.IsCoreRegister()) << src; + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreSpanning(FrameOffset dest, + ManagedRegister msrc, + FrameOffset in_off, + ManagedRegister mscratch) { + ArmManagedRegister src = msrc.AsArm(); + ArmManagedRegister scratch = mscratch.AsArm(); + __ StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value()); + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, in_off.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + sizeof(uint32_t)); +} + +void ArmJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, + ManagedRegister mbase, + MemberOffset offs, + bool unpoison_reference) { + ArmManagedRegister base = mbase.AsArm(); + ArmManagedRegister dst = mdest.AsArm(); + CHECK(base.IsCoreRegister()) << base; + CHECK(dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, + dst.AsCoreRegister(), + base.AsCoreRegister(), + offs.Int32Value()); + if (unpoison_reference) { + __ MaybeUnpoisonHeapReference(dst.AsCoreRegister()); + } +} + +void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { + ArmManagedRegister dst = mdest.AsArm(); + CHECK(dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value()); +} + +void ArmJNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, + ManagedRegister mbase, + Offset offs) { + ArmManagedRegister base = mbase.AsArm(); + ArmManagedRegister dst = mdest.AsArm(); + CHECK(base.IsCoreRegister()) << base; + CHECK(dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, + dst.AsCoreRegister(), + base.AsCoreRegister(), + offs.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, + uint32_t imm, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadImmediate(scratch.AsCoreRegister(), imm); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); +} + +static void EmitLoad(ArmAssembler* assembler, + ManagedRegister m_dst, + Register src_register, + int32_t src_offset, + size_t size) { + ArmManagedRegister dst = m_dst.AsArm(); + if (dst.IsNoRegister()) { + CHECK_EQ(0u, size) << dst; + } else if (dst.IsCoreRegister()) { + CHECK_EQ(4u, size) << dst; + assembler->LoadFromOffset(kLoadWord, dst.AsCoreRegister(), src_register, src_offset); + } else if (dst.IsRegisterPair()) { + CHECK_EQ(8u, size) << dst; + assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairLow(), src_register, src_offset); + assembler->LoadFromOffset(kLoadWord, dst.AsRegisterPairHigh(), src_register, src_offset + 4); + } else if (dst.IsSRegister()) { + assembler->LoadSFromOffset(dst.AsSRegister(), src_register, src_offset); + } else { + CHECK(dst.IsDRegister()) << dst; + assembler->LoadDFromOffset(dst.AsDRegister(), src_register, src_offset); + } +} + +void ArmJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { + EmitLoad(asm_.get(), m_dst, SP, src.Int32Value(), size); +} + +void ArmJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) { + EmitLoad(asm_.get(), m_dst, TR, src.Int32Value(), size); +} + +void ArmJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) { + ArmManagedRegister dst = m_dst.AsArm(); + CHECK(dst.IsCoreRegister()) << dst; + __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value()); +} + +void ArmJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, fr_offs.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + __ AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, thr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { + __ StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value()); +} + +void ArmJNIMacroAssembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; +} + +void ArmJNIMacroAssembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; +} + +void ArmJNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t /*size*/) { + ArmManagedRegister dst = m_dst.AsArm(); + ArmManagedRegister src = m_src.AsArm(); + if (!dst.Equals(src)) { + if (dst.IsCoreRegister()) { + CHECK(src.IsCoreRegister()) << src; + __ mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister())); + } else if (dst.IsDRegister()) { + if (src.IsDRegister()) { + __ vmovd(dst.AsDRegister(), src.AsDRegister()); + } else { + // VMOV Dn, Rlo, Rhi (Dn = {Rlo, Rhi}) + CHECK(src.IsRegisterPair()) << src; + __ vmovdrr(dst.AsDRegister(), src.AsRegisterPairLow(), src.AsRegisterPairHigh()); + } + } else if (dst.IsSRegister()) { + if (src.IsSRegister()) { + __ vmovs(dst.AsSRegister(), src.AsSRegister()); + } else { + // VMOV Sn, Rn (Sn = Rn) + CHECK(src.IsCoreRegister()) << src; + __ vmovsr(dst.AsSRegister(), src.AsCoreRegister()); + } + } else { + CHECK(dst.IsRegisterPair()) << dst; + CHECK(src.IsRegisterPair()) << src; + // Ensure that the first move doesn't clobber the input of the second. + if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) { + __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); + __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); + } else { + __ mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh())); + __ mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow())); + } + } + } +} + +void ArmJNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src, + ManagedRegister mscratch, + size_t size) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); + } else if (size == 8) { + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4); + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4); + } +} + +void ArmJNIMacroAssembler::Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + Register scratch = mscratch.AsArm().AsCoreRegister(); + CHECK_EQ(size, 4u); + __ LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value()); + __ StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value()); +} + +void ArmJNIMacroAssembler::Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister mscratch, + size_t size) { + Register scratch = mscratch.AsArm().AsCoreRegister(); + CHECK_EQ(size, 4u); + __ LoadFromOffset(kLoadWord, scratch, SP, src.Int32Value()); + __ StoreToOffset(kStoreWord, + scratch, + dest_base.AsArm().AsCoreRegister(), + dest_offset.Int32Value()); +} + +void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/, + FrameOffset /*src_base*/, + Offset /*src_offset*/, + ManagedRegister /*mscratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL); +} + +void ArmJNIMacroAssembler::Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + CHECK_EQ(size, 4u); + Register scratch = mscratch.AsArm().AsCoreRegister(); + __ LoadFromOffset(kLoadWord, scratch, src.AsArm().AsCoreRegister(), src_offset.Int32Value()); + __ StoreToOffset(kStoreWord, scratch, dest.AsArm().AsCoreRegister(), dest_offset.Int32Value()); +} + +void ArmJNIMacroAssembler::Copy(FrameOffset /*dst*/, + Offset /*dest_offset*/, + FrameOffset /*src*/, + Offset /*src_offset*/, + ManagedRegister /*scratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL); +} + +void ArmJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, + FrameOffset handle_scope_offset, + ManagedRegister min_reg, + bool null_allowed) { + ArmManagedRegister out_reg = mout_reg.AsArm(); + ArmManagedRegister in_reg = min_reg.AsArm(); + CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg; + CHECK(out_reg.IsCoreRegister()) << out_reg; + if (null_allowed) { + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) + if (in_reg.IsNoRegister()) { + __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value()); + in_reg = out_reg; + } + __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); + if (!out_reg.Equals(in_reg)) { + __ it(EQ, kItElse); + __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); + } else { + __ it(NE); + } + __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); + } else { + __ AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); + } +} + +void ArmJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister mscratch, + bool null_allowed) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + if (null_allowed) { + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value()); + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) + __ cmp(scratch.AsCoreRegister(), ShifterOperand(0)); + __ it(NE); + __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE); + } else { + __ AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL); + } + __ StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, out_off.Int32Value()); +} + +void ArmJNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, + ManagedRegister min_reg) { + ArmManagedRegister out_reg = mout_reg.AsArm(); + ArmManagedRegister in_reg = min_reg.AsArm(); + CHECK(out_reg.IsCoreRegister()) << out_reg; + CHECK(in_reg.IsCoreRegister()) << in_reg; + Label null_arg; + if (!out_reg.Equals(in_reg)) { + __ LoadImmediate(out_reg.AsCoreRegister(), 0, EQ); // TODO: why EQ? + } + __ cmp(in_reg.AsCoreRegister(), ShifterOperand(0)); + __ it(NE); + __ LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(), in_reg.AsCoreRegister(), 0, NE); +} + +void ArmJNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void ArmJNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void ArmJNIMacroAssembler::Call(ManagedRegister mbase, + Offset offset, + ManagedRegister mscratch) { + ArmManagedRegister base = mbase.AsArm(); + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(base.IsCoreRegister()) << base; + CHECK(scratch.IsCoreRegister()) << scratch; + __ LoadFromOffset(kLoadWord, + scratch.AsCoreRegister(), + base.AsCoreRegister(), + offset.Int32Value()); + __ blx(scratch.AsCoreRegister()); + // TODO: place reference map on call. +} + +void ArmJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + // Call *(*(SP + base) + offset) + __ LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, base.Int32Value()); + __ LoadFromOffset(kLoadWord, + scratch.AsCoreRegister(), + scratch.AsCoreRegister(), + offset.Int32Value()); + __ blx(scratch.AsCoreRegister()); + // TODO: place reference map on call +} + +void ArmJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED, + ManagedRegister scratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmJNIMacroAssembler::GetCurrentThread(ManagedRegister tr) { + __ mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR)); +} + +void ArmJNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /*scratch*/) { + __ StoreToOffset(kStoreWord, TR, SP, offset.Int32Value(), AL); +} + +void ArmJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { + ArmManagedRegister scratch = mscratch.AsArm(); + ArmExceptionSlowPath* slow = new (__ GetArena()) ArmExceptionSlowPath(scratch, stack_adjust); + __ GetBuffer()->EnqueueSlowPath(slow); + __ LoadFromOffset(kLoadWord, + scratch.AsCoreRegister(), + TR, + Thread::ExceptionOffset<kArmPointerSize>().Int32Value()); + __ cmp(scratch.AsCoreRegister(), ShifterOperand(0)); + __ b(slow->Entry(), NE); +} + +std::unique_ptr<JNIMacroLabel> ArmJNIMacroAssembler::CreateLabel() { + return std::unique_ptr<JNIMacroLabel>(new ArmJNIMacroLabel()); +} + +void ArmJNIMacroAssembler::Jump(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ b(ArmJNIMacroLabel::Cast(label)->AsArm()); +} + +void ArmJNIMacroAssembler::Jump(JNIMacroLabel* label, + JNIMacroUnaryCondition condition, + ManagedRegister test) { + CHECK(label != nullptr); + + arm::Condition arm_cond; + switch (condition) { + case JNIMacroUnaryCondition::kZero: + arm_cond = EQ; + break; + case JNIMacroUnaryCondition::kNotZero: + arm_cond = NE; + break; + default: + LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition); + UNREACHABLE(); + } + __ cmp(test.AsArm().AsCoreRegister(), ShifterOperand(0)); + __ b(ArmJNIMacroLabel::Cast(label)->AsArm(), arm_cond); +} + +void ArmJNIMacroAssembler::Bind(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ Bind(ArmJNIMacroLabel::Cast(label)->AsArm()); +} + +#undef __ + +void ArmExceptionSlowPath::Emit(Assembler* sasm) { + ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm); +#define __ sp_asm-> + __ Bind(&entry_); + if (stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSizeImpl(sp_asm, stack_adjust_); + } + // Pass exception object as argument. + // Don't care about preserving R0 as this call won't return. + __ mov(R0, ShifterOperand(scratch_.AsCoreRegister())); + // Set up call to Thread::Current()->pDeliverException. + __ LoadFromOffset(kLoadWord, + R12, + TR, + QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value()); + __ blx(R12); +#undef __ +} + +void ArmJNIMacroAssembler::MemoryBarrier(ManagedRegister mscratch) { + CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12); + asm_->dmb(SY); +} + +} // namespace arm +} // namespace art diff --git a/compiler/utils/arm/jni_macro_assembler_arm.h b/compiler/utils/arm/jni_macro_assembler_arm.h new file mode 100644 index 0000000000..809ac8be94 --- /dev/null +++ b/compiler/utils/arm/jni_macro_assembler_arm.h @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_ +#define ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_ + +#include <memory> +#include <type_traits> +#include <vector> + +#include "arch/instruction_set.h" +#include "base/enums.h" +#include "base/macros.h" +#include "utils/jni_macro_assembler.h" +#include "utils/label.h" +#include "offsets.h" + +namespace art { +namespace arm { + +class ArmAssembler; + +class ArmJNIMacroAssembler : public JNIMacroAssembler<PointerSize::k32> { + public: + ArmJNIMacroAssembler(ArenaAllocator* arena, InstructionSet isa); + virtual ~ArmJNIMacroAssembler(); + + size_t CodeSize() const OVERRIDE; + DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE; + void FinalizeCode() OVERRIDE; + void FinalizeInstructions(const MemoryRegion& region) OVERRIDE; + + // + // Overridden common assembler high-level functionality + // + + // Emit code that will create an activation on the stack + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) + OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines + void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + + void StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + + void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; + + void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, + ManagedRegister scratch) OVERRIDE; + + // Load routines + void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + + void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE; + + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + + void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, + bool unpoison_reference) OVERRIDE; + + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE; + + // Copying routines + void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister scratch) OVERRIDE; + + void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) + OVERRIDE; + + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, + ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, + ManagedRegister scratch, size_t size) OVERRIDE; + + // Sign extension + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current() + void GetCurrentThread(ManagedRegister tr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, + ManagedRegister in_reg, bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, + ManagedRegister scratch, bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset] + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; + void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + + void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label) OVERRIDE; + // Emit a conditional jump to the label by applying a unary condition test to the register. + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label) OVERRIDE; + + private: + std::unique_ptr<ArmAssembler> asm_; +}; + +class ArmJNIMacroLabel FINAL : public JNIMacroLabelCommon<ArmJNIMacroLabel, art::Label, kArm> { + public: + art::Label* AsArm() { + return AsPlatformLabel(); + } +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_H_ diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc new file mode 100644 index 0000000000..d07c047253 --- /dev/null +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -0,0 +1,695 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <iostream> +#include <type_traits> + +#include "jni_macro_assembler_arm_vixl.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "thread.h" + +using namespace vixl::aarch32; // NOLINT(build/namespaces) +namespace vixl32 = vixl::aarch32; + +using vixl::ExactAssemblyScope; +using vixl::CodeBufferCheckScope; + +namespace art { +namespace arm { + +#ifdef ___ +#error "ARM Assembler macro already defined." +#else +#define ___ asm_.GetVIXLAssembler()-> +#endif + +void ArmVIXLJNIMacroAssembler::FinalizeCode() { + for (const std::unique_ptr< + ArmVIXLJNIMacroAssembler::ArmException>& exception : exception_blocks_) { + EmitExceptionPoll(exception.get()); + } + asm_.FinalizeCode(); +} + +static dwarf::Reg DWARFReg(vixl32::Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg.GetCode())); +} + +static dwarf::Reg DWARFReg(vixl32::SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode())); +} + +static constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize); + +void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) { + CHECK_ALIGNED(frame_size, kStackAlignment); + CHECK(r0.Is(method_reg.AsArm().AsVIXLRegister())); + + // Push callee saves and link register. + RegList core_spill_mask = 1 << LR; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); + } else { + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); + } + } + ___ Push(RegisterList(core_spill_mask)); + cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); + cfi().RelOffsetForMany(DWARFReg(r0), 0, core_spill_mask, kFramePointerSize); + if (fp_spill_mask != 0) { + uint32_t first = CTZ(fp_spill_mask); + + // Check that list is contiguous. + DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask))); + + ___ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask))); + cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); + cfi().RelOffsetForMany(DWARFReg(s0), 0, fp_spill_mask, kFramePointerSize); + } + + // Increase frame to required size. + int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); + // Must at least have space for Method*. + CHECK_GT(frame_size, pushed_values * kFramePointerSize); + IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. + + // Write out Method*. + asm_.StoreToOffset(kStoreWord, r0, sp, 0); + + // Write out entry spills. + int32_t offset = frame_size + kFramePointerSize; + for (size_t i = 0; i < entry_spills.size(); ++i) { + ArmManagedRegister reg = entry_spills.at(i).AsArm(); + if (reg.IsNoRegister()) { + // only increment stack offset. + ManagedRegisterSpill spill = entry_spills.at(i); + offset += spill.getSize(); + } else if (reg.IsCoreRegister()) { + asm_.StoreToOffset(kStoreWord, reg.AsVIXLRegister(), sp, offset); + offset += 4; + } else if (reg.IsSRegister()) { + asm_.StoreSToOffset(reg.AsVIXLSRegister(), sp, offset); + offset += 4; + } else if (reg.IsDRegister()) { + asm_.StoreDToOffset(reg.AsVIXLDRegister(), sp, offset); + offset += 8; + } + } +} + +void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs) { + CHECK_ALIGNED(frame_size, kStackAlignment); + cfi().RememberState(); + + // Compute callee saves to pop and PC. + RegList core_spill_mask = 1 << PC; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); + } else { + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); + } + } + + // Decrease frame to start of callee saves. + int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); + CHECK_GT(frame_size, pop_values * kFramePointerSize); + DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. + + if (fp_spill_mask != 0) { + uint32_t first = CTZ(fp_spill_mask); + // Check that list is contiguous. + DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask))); + + ___ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask))); + cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); + cfi().RestoreMany(DWARFReg(s0), fp_spill_mask); + } + + // Pop callee saves and PC. + ___ Pop(RegisterList(core_spill_mask)); + + // The CFI should be restored for any code that follows the exit block. + cfi().RestoreState(); + cfi().DefCFAOffset(frame_size); +} + + +void ArmVIXLJNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + asm_.AddConstant(sp, -adjust); + cfi().AdjustCFAOffset(adjust); +} + +void ArmVIXLJNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + asm_.AddConstant(sp, adjust); + cfi().AdjustCFAOffset(-adjust); +} + +void ArmVIXLJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister m_src, size_t size) { + ArmManagedRegister src = m_src.AsArm(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsCoreRegister()) { + CHECK_EQ(4u, size); + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(src.AsVIXLRegister()); + asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); + } else if (src.IsRegisterPair()) { + CHECK_EQ(8u, size); + asm_.StoreToOffset(kStoreWord, src.AsVIXLRegisterPairLow(), sp, dest.Int32Value()); + asm_.StoreToOffset(kStoreWord, src.AsVIXLRegisterPairHigh(), sp, dest.Int32Value() + 4); + } else if (src.IsSRegister()) { + CHECK_EQ(4u, size); + asm_.StoreSToOffset(src.AsVIXLSRegister(), sp, dest.Int32Value()); + } else { + CHECK_EQ(8u, size); + CHECK(src.IsDRegister()) << src; + asm_.StoreDToOffset(src.AsVIXLDRegister(), sp, dest.Int32Value()); + } +} + +void ArmVIXLJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { + ArmManagedRegister src = msrc.AsArm(); + CHECK(src.IsCoreRegister()) << src; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(src.AsVIXLRegister()); + asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { + ArmManagedRegister src = msrc.AsArm(); + CHECK(src.IsCoreRegister()) << src; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(src.AsVIXLRegister()); + asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::StoreSpanning(FrameOffset dest, + ManagedRegister msrc, + FrameOffset in_off, + ManagedRegister mscratch) { + ArmManagedRegister src = msrc.AsArm(); + ArmManagedRegister scratch = mscratch.AsArm(); + asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, in_off.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, dest.Int32Value() + 4); +} + +void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, + FrameOffset src, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, src.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, dest.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs, + bool unpoison_reference) { + ArmManagedRegister dst = dest.AsArm(); + CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(dst.AsVIXLRegister(), base.AsArm().AsVIXLRegister()); + asm_.LoadFromOffset(kLoadWord, + dst.AsVIXLRegister(), + base.AsArm().AsVIXLRegister(), + offs.Int32Value()); + + if (unpoison_reference) { + asm_.MaybeUnpoisonHeapReference(dst.AsVIXLRegister()); + } +} + +void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister dest ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::LoadRawPtr(ManagedRegister dest ATTRIBUTE_UNUSED, + ManagedRegister base ATTRIBUTE_UNUSED, + Offset offs ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, + uint32_t imm, + ManagedRegister scratch) { + ArmManagedRegister mscratch = scratch.AsArm(); + CHECK(mscratch.IsCoreRegister()) << mscratch; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(mscratch.AsVIXLRegister()); + asm_.LoadImmediate(mscratch.AsVIXLRegister(), imm); + asm_.StoreToOffset(kStoreWord, mscratch.AsVIXLRegister(), sp, dest.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { + return Load(m_dst.AsArm(), sp, src.Int32Value(), size); +} + +void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, + ThreadOffset32 src, + size_t size) { + return Load(m_dst.AsArm(), tr, src.Int32Value(), size); +} + +void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) { + ArmManagedRegister dst = m_dst.AsArm(); + CHECK(dst.IsCoreRegister()) << dst; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(dst.AsVIXLRegister()); + asm_.LoadFromOffset(kLoadWord, dst.AsVIXLRegister(), tr, offs.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), tr, thr_offs.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, fr_offs.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs ATTRIBUTE_UNUSED, + FrameOffset fr_offs ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + asm_.AddConstant(scratch.AsVIXLRegister(), sp, fr_offs.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), tr, thr_offs.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { + asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; +} + +void ArmVIXLJNIMacroAssembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; +} + +void ArmVIXLJNIMacroAssembler::Move(ManagedRegister m_dst, + ManagedRegister m_src, + size_t size ATTRIBUTE_UNUSED) { + ArmManagedRegister dst = m_dst.AsArm(); + ArmManagedRegister src = m_src.AsArm(); + if (!dst.Equals(src)) { + if (dst.IsCoreRegister()) { + CHECK(src.IsCoreRegister()) << src; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(dst.AsVIXLRegister()); + ___ Mov(dst.AsVIXLRegister(), src.AsVIXLRegister()); + } else if (dst.IsDRegister()) { + if (src.IsDRegister()) { + ___ Vmov(F64, dst.AsVIXLDRegister(), src.AsVIXLDRegister()); + } else { + // VMOV Dn, Rlo, Rhi (Dn = {Rlo, Rhi}) + CHECK(src.IsRegisterPair()) << src; + ___ Vmov(dst.AsVIXLDRegister(), src.AsVIXLRegisterPairLow(), src.AsVIXLRegisterPairHigh()); + } + } else if (dst.IsSRegister()) { + if (src.IsSRegister()) { + ___ Vmov(F32, dst.AsVIXLSRegister(), src.AsVIXLSRegister()); + } else { + // VMOV Sn, Rn (Sn = Rn) + CHECK(src.IsCoreRegister()) << src; + ___ Vmov(dst.AsVIXLSRegister(), src.AsVIXLRegister()); + } + } else { + CHECK(dst.IsRegisterPair()) << dst; + CHECK(src.IsRegisterPair()) << src; + // Ensure that the first move doesn't clobber the input of the second. + if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) { + ___ Mov(dst.AsVIXLRegisterPairLow(), src.AsVIXLRegisterPairLow()); + ___ Mov(dst.AsVIXLRegisterPairHigh(), src.AsVIXLRegisterPairHigh()); + } else { + ___ Mov(dst.AsVIXLRegisterPairHigh(), src.AsVIXLRegisterPairHigh()); + ___ Mov(dst.AsVIXLRegisterPairLow(), src.AsVIXLRegisterPairLow()); + } + } + } +} + +void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src, + ManagedRegister scratch, + size_t size) { + ArmManagedRegister temp = scratch.AsArm(); + CHECK(temp.IsCoreRegister()) << temp; + CHECK(size == 4 || size == 8) << size; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(temp.AsVIXLRegister()); + if (size == 4) { + asm_.LoadFromOffset(kLoadWord, temp.AsVIXLRegister(), sp, src.Int32Value()); + asm_.StoreToOffset(kStoreWord, temp.AsVIXLRegister(), sp, dest.Int32Value()); + } else if (size == 8) { + asm_.LoadFromOffset(kLoadWord, temp.AsVIXLRegister(), sp, src.Int32Value()); + asm_.StoreToOffset(kStoreWord, temp.AsVIXLRegister(), sp, dest.Int32Value()); + asm_.LoadFromOffset(kLoadWord, temp.AsVIXLRegister(), sp, src.Int32Value() + 4); + asm_.StoreToOffset(kStoreWord, temp.AsVIXLRegister(), sp, dest.Int32Value() + 4); + } +} + +void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + ManagedRegister src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest_base ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED, + FrameOffset src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + ManagedRegister src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister scratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, + FrameOffset handle_scope_offset, + ManagedRegister min_reg, + bool null_allowed) { + ArmManagedRegister out_reg = mout_reg.AsArm(); + ArmManagedRegister in_reg = min_reg.AsArm(); + CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg; + CHECK(out_reg.IsCoreRegister()) << out_reg; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(out_reg.AsVIXLRegister()); + if (null_allowed) { + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) + if (in_reg.IsNoRegister()) { + asm_.LoadFromOffset(kLoadWord, + out_reg.AsVIXLRegister(), + sp, + handle_scope_offset.Int32Value()); + in_reg = out_reg; + } + + temps.Exclude(in_reg.AsVIXLRegister()); + ___ Cmp(in_reg.AsVIXLRegister(), 0); + + if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { + if (!out_reg.Equals(in_reg)) { + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + 3 * vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + ___ it(eq, 0xc); + ___ mov(eq, out_reg.AsVIXLRegister(), 0); + asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); + } else { + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + 2 * vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + ___ it(ne, 0x8); + asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); + } + } else { + // TODO: Implement this (old arm assembler would have crashed here). + UNIMPLEMENTED(FATAL); + } + } else { + asm_.AddConstant(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value()); + } +} + +void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister mscratch, + bool null_allowed) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + if (null_allowed) { + asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value()); + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) + ___ Cmp(scratch.AsVIXLRegister(), 0); + + if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + 2 * vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + ___ it(ne, 0x8); + asm_.AddConstantInIt(scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); + } else { + // TODO: Implement this (old arm assembler would have crashed here). + UNIMPLEMENTED(FATAL); + } + } else { + asm_.AddConstant(scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value()); + } + asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, out_off.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::LoadReferenceFromHandleScope( + ManagedRegister mout_reg ATTRIBUTE_UNUSED, + ManagedRegister min_reg ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { + // TODO: not validating references. +} + +void ArmVIXLJNIMacroAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { + // TODO: not validating references. +} + +void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase, + Offset offset, + ManagedRegister mscratch) { + ArmManagedRegister base = mbase.AsArm(); + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(base.IsCoreRegister()) << base; + CHECK(scratch.IsCoreRegister()) << scratch; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + asm_.LoadFromOffset(kLoadWord, + scratch.AsVIXLRegister(), + base.AsVIXLRegister(), + offset.Int32Value()); + ___ Blx(scratch.AsVIXLRegister()); + // TODO: place reference map on call. +} + +void ArmVIXLJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { + ArmManagedRegister scratch = mscratch.AsArm(); + CHECK(scratch.IsCoreRegister()) << scratch; + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + // Call *(*(SP + base) + offset) + asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, base.Int32Value()); + asm_.LoadFromOffset(kLoadWord, + scratch.AsVIXLRegister(), + scratch.AsVIXLRegister(), + offset.Int32Value()); + ___ Blx(scratch.AsVIXLRegister()); + // TODO: place reference map on call +} + +void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED, + ManagedRegister scratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister mtr) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(mtr.AsArm().AsVIXLRegister()); + ___ Mov(mtr.AsArm().AsVIXLRegister(), tr); +} + +void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset, + ManagedRegister scratch ATTRIBUTE_UNUSED) { + asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value()); +} + +void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) { + CHECK_ALIGNED(stack_adjust, kStackAlignment); + ArmManagedRegister scratch = m_scratch.AsArm(); + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(scratch.AsVIXLRegister()); + exception_blocks_.emplace_back( + new ArmVIXLJNIMacroAssembler::ArmException(scratch, stack_adjust)); + asm_.LoadFromOffset(kLoadWord, + scratch.AsVIXLRegister(), + tr, + Thread::ExceptionOffset<kArmPointerSize>().Int32Value()); + + ___ Cmp(scratch.AsVIXLRegister(), 0); + { + ExactAssemblyScope guard(asm_.GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + vixl32::Label* label = exception_blocks_.back()->Entry(); + ___ b(ne, Narrow, label); + ___ AddBranchLabel(label); + } + // TODO: think about using CBNZ here. +} + +std::unique_ptr<JNIMacroLabel> ArmVIXLJNIMacroAssembler::CreateLabel() { + return std::unique_ptr<JNIMacroLabel>(new ArmVIXLJNIMacroLabel()); +} + +void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label) { + CHECK(label != nullptr); + ___ B(ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); +} + +void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label, + JNIMacroUnaryCondition condition, + ManagedRegister test) { + CHECK(label != nullptr); + + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(test.AsArm().AsVIXLRegister()); + switch (condition) { + case JNIMacroUnaryCondition::kZero: + ___ CompareAndBranchIfZero(test.AsArm().AsVIXLRegister(), + ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); + break; + case JNIMacroUnaryCondition::kNotZero: + ___ CompareAndBranchIfNonZero(test.AsArm().AsVIXLRegister(), + ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); + break; + default: + LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition); + UNREACHABLE(); + } +} + +void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) { + CHECK(label != nullptr); + ___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); +} + +void ArmVIXLJNIMacroAssembler::EmitExceptionPoll( + ArmVIXLJNIMacroAssembler::ArmException* exception) { + ___ Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); + } + + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(exception->scratch_.AsVIXLRegister()); + // Pass exception object as argument. + // Don't care about preserving r0 as this won't return. + ___ Mov(r0, exception->scratch_.AsVIXLRegister()); + temps.Include(exception->scratch_.AsVIXLRegister()); + // TODO: check that exception->scratch_ is dead by this point. + vixl32::Register temp = temps.Acquire(); + ___ Ldr(temp, + MemOperand(tr, + QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value())); + ___ Blx(temp); +} + +void ArmVIXLJNIMacroAssembler::MemoryBarrier(ManagedRegister scratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL); +} + +void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister + dest, + vixl32::Register base, + int32_t offset, + size_t size) { + if (dest.IsNoRegister()) { + CHECK_EQ(0u, size) << dest; + } else if (dest.IsCoreRegister()) { + CHECK(!dest.AsVIXLRegister().Is(sp)) << dest; + + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(dest.AsVIXLRegister()); + + if (size == 1u) { + ___ Ldrb(dest.AsVIXLRegister(), MemOperand(base, offset)); + } else { + CHECK_EQ(4u, size) << dest; + ___ Ldr(dest.AsVIXLRegister(), MemOperand(base, offset)); + } + } else if (dest.IsRegisterPair()) { + CHECK_EQ(8u, size) << dest; + ___ Ldr(dest.AsVIXLRegisterPairLow(), MemOperand(base, offset)); + ___ Ldr(dest.AsVIXLRegisterPairHigh(), MemOperand(base, offset + 4)); + } else if (dest.IsSRegister()) { + ___ Vldr(dest.AsVIXLSRegister(), MemOperand(base, offset)); + } else { + CHECK(dest.IsDRegister()) << dest; + ___ Vldr(dest.AsVIXLDRegister(), MemOperand(base, offset)); + } +} + +} // namespace arm +} // namespace art diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h new file mode 100644 index 0000000000..f3baf1f062 --- /dev/null +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_VIXL_H_ +#define ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_VIXL_H_ + +#include "base/arena_containers.h" +#include "base/logging.h" +#include "constants_arm.h" +#include "offsets.h" +#include "utils/arm/assembler_arm_shared.h" +#include "utils/arm/assembler_arm_vixl.h" +#include "utils/arm/managed_register_arm.h" +#include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" + +namespace art { +namespace arm { + +class ArmVIXLJNIMacroAssembler FINAL + : public JNIMacroAssemblerFwd<ArmVIXLAssembler, PointerSize::k32> { + private: + class ArmException; + public: + explicit ArmVIXLJNIMacroAssembler(ArenaAllocator* arena) + : JNIMacroAssemblerFwd(arena), + exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {} + + virtual ~ArmVIXLJNIMacroAssembler() {} + void FinalizeCode() OVERRIDE; + + // + // Overridden common assembler high-level functionality + // + + // Emit code that will create an activation on the stack. + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack. + void RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs) OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines. + void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + + void StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + + void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; + + void StoreSpanning(FrameOffset dest, + ManagedRegister src, + FrameOffset in_off, + ManagedRegister scratch) OVERRIDE; + + // Load routines. + void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + + void LoadFromThread(ManagedRegister dest, + ThreadOffset32 src, + size_t size) OVERRIDE; + + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + + void LoadRef(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs, + bool unpoison_reference) OVERRIDE; + + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE; + + // Copying routines. + void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister scratch) OVERRIDE; + + void CopyRawPtrToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + // Sign extension. + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension. + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current(). + void GetCurrentThread(ManagedRegister mtr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, + ManagedRegister scratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + void CreateHandleScopeEntry(ManagedRegister out_reg, + FrameOffset handlescope_offset, + ManagedRegister in_reg, + bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handlescope_offset, + ManagedRegister scratch, + bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst. + void LoadReferenceFromHandleScope(ManagedRegister dst, + ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset]. + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; + void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust); + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label) OVERRIDE; + // Emit a conditional jump to the label by applying a unary condition test to the register. + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label) OVERRIDE; + + void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + + void EmitExceptionPoll(ArmVIXLJNIMacroAssembler::ArmException *exception); + void Load(ArmManagedRegister dest, vixl32::Register base, int32_t offset, size_t size); + + private: + class ArmException { + private: + ArmException(ArmManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + vixl32::Label* Entry() { return &exception_entry_; } + + // Register used for passing Thread::Current()->exception_ . + const ArmManagedRegister scratch_; + + // Stack adjust for ExceptionPool. + const size_t stack_adjust_; + + vixl32::Label exception_entry_; + + friend class ArmVIXLJNIMacroAssembler; + DISALLOW_COPY_AND_ASSIGN(ArmException); + }; + + // List of exception blocks to generate at the end of the code cache. + ArenaVector<std::unique_ptr<ArmVIXLJNIMacroAssembler::ArmException>> exception_blocks_; + // Used for testing. + friend class ArmVIXLAssemblerTest_VixlLoadFromOffset_Test; + friend class ArmVIXLAssemblerTest_VixlStoreToOffset_Test; +}; + +class ArmVIXLJNIMacroLabel FINAL + : public JNIMacroLabelCommon<ArmVIXLJNIMacroLabel, + vixl32::Label, + kArm> { + public: + vixl32::Label* AsArm() { + return AsPlatformLabel(); + } +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_VIXL_H_ diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h index 5b84058f7f..2be2d5638e 100644 --- a/compiler/utils/arm/managed_register_arm.h +++ b/compiler/utils/arm/managed_register_arm.h @@ -22,6 +22,12 @@ #include "debug/dwarf/register.h" #include "utils/managed_register.h" +// TODO(VIXL): Make VIXL compile with -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch32/macro-assembler-aarch32.h" +#pragma GCC diagnostic pop + namespace art { namespace arm { @@ -85,34 +91,49 @@ const int kNumberOfAllocIds = // There is a one-to-one mapping between ManagedRegister and register id. class ArmManagedRegister : public ManagedRegister { public: - Register AsCoreRegister() const { + constexpr Register AsCoreRegister() const { CHECK(IsCoreRegister()); return static_cast<Register>(id_); } - SRegister AsSRegister() const { + vixl::aarch32::Register AsVIXLRegister() const { + CHECK(IsCoreRegister()); + return vixl::aarch32::Register(id_); + } + + constexpr SRegister AsSRegister() const { CHECK(IsSRegister()); return static_cast<SRegister>(id_ - kNumberOfCoreRegIds); } - DRegister AsDRegister() const { + vixl::aarch32::SRegister AsVIXLSRegister() const { + CHECK(IsSRegister()); + return vixl::aarch32::SRegister(id_ - kNumberOfCoreRegIds); + } + + constexpr DRegister AsDRegister() const { CHECK(IsDRegister()); return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds); } - SRegister AsOverlappingDRegisterLow() const { + vixl::aarch32::DRegister AsVIXLDRegister() const { + CHECK(IsDRegister()); + return vixl::aarch32::DRegister(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds); + } + + constexpr SRegister AsOverlappingDRegisterLow() const { CHECK(IsOverlappingDRegister()); DRegister d_reg = AsDRegister(); return static_cast<SRegister>(d_reg * 2); } - SRegister AsOverlappingDRegisterHigh() const { + constexpr SRegister AsOverlappingDRegisterHigh() const { CHECK(IsOverlappingDRegister()); DRegister d_reg = AsDRegister(); return static_cast<SRegister>(d_reg * 2 + 1); } - RegisterPair AsRegisterPair() const { + constexpr RegisterPair AsRegisterPair() const { CHECK(IsRegisterPair()); Register reg_low = AsRegisterPairLow(); if (reg_low == R1) { @@ -122,50 +143,58 @@ class ArmManagedRegister : public ManagedRegister { } } - Register AsRegisterPairLow() const { + constexpr Register AsRegisterPairLow() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdLow(). return FromRegId(AllocIdLow()).AsCoreRegister(); } - Register AsRegisterPairHigh() const { + vixl::aarch32::Register AsVIXLRegisterPairLow() const { + return vixl::aarch32::Register(AsRegisterPairLow()); + } + + constexpr Register AsRegisterPairHigh() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdHigh(). return FromRegId(AllocIdHigh()).AsCoreRegister(); } - bool IsCoreRegister() const { + vixl::aarch32::Register AsVIXLRegisterPairHigh() const { + return vixl::aarch32::Register(AsRegisterPairHigh()); + } + + constexpr bool IsCoreRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfCoreRegIds); } - bool IsSRegister() const { + constexpr bool IsSRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - kNumberOfCoreRegIds; return (0 <= test) && (test < kNumberOfSRegIds); } - bool IsDRegister() const { + constexpr bool IsDRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds); return (0 <= test) && (test < kNumberOfDRegIds); } // Returns true if this DRegister overlaps SRegisters. - bool IsOverlappingDRegister() const { + constexpr bool IsOverlappingDRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds); return (0 <= test) && (test < kNumberOfOverlappingDRegIds); } - bool IsRegisterPair() const { + constexpr bool IsRegisterPair() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCoreRegIds + kNumberOfSRegIds + kNumberOfDRegIds); return (0 <= test) && (test < kNumberOfPairRegIds); } - bool IsSameType(ArmManagedRegister test) const { + constexpr bool IsSameType(ArmManagedRegister test) const { CHECK(IsValidManagedRegister() && test.IsValidManagedRegister()); return (IsCoreRegister() && test.IsCoreRegister()) || @@ -182,29 +211,29 @@ class ArmManagedRegister : public ManagedRegister { void Print(std::ostream& os) const; - static ArmManagedRegister FromCoreRegister(Register r) { + static constexpr ArmManagedRegister FromCoreRegister(Register r) { CHECK_NE(r, kNoRegister); return FromRegId(r); } - static ArmManagedRegister FromSRegister(SRegister r) { + static constexpr ArmManagedRegister FromSRegister(SRegister r) { CHECK_NE(r, kNoSRegister); return FromRegId(r + kNumberOfCoreRegIds); } - static ArmManagedRegister FromDRegister(DRegister r) { + static constexpr ArmManagedRegister FromDRegister(DRegister r) { CHECK_NE(r, kNoDRegister); return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfSRegIds)); } - static ArmManagedRegister FromRegisterPair(RegisterPair r) { + static constexpr ArmManagedRegister FromRegisterPair(RegisterPair r) { CHECK_NE(r, kNoRegisterPair); return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfSRegIds + kNumberOfDRegIds)); } // Return a RegisterPair consisting of Register r_low and r_low + 1. - static ArmManagedRegister FromCoreRegisterPair(Register r_low) { + static constexpr ArmManagedRegister FromCoreRegisterPair(Register r_low) { if (r_low != R1) { // not the dalvik special case CHECK_NE(r_low, kNoRegister); CHECK_EQ(0, (r_low % 2)); @@ -217,7 +246,7 @@ class ArmManagedRegister : public ManagedRegister { } // Return a DRegister overlapping SRegister r_low and r_low + 1. - static ArmManagedRegister FromSRegisterPair(SRegister r_low) { + static constexpr ArmManagedRegister FromSRegisterPair(SRegister r_low) { CHECK_NE(r_low, kNoSRegister); CHECK_EQ(0, (r_low % 2)); const int r = r_low / 2; @@ -226,7 +255,7 @@ class ArmManagedRegister : public ManagedRegister { } private: - bool IsValidManagedRegister() const { + constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); } @@ -251,9 +280,9 @@ class ArmManagedRegister : public ManagedRegister { friend class ManagedRegister; - explicit ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + explicit constexpr ArmManagedRegister(int reg_id) : ManagedRegister(reg_id) {} - static ArmManagedRegister FromRegId(int reg_id) { + static constexpr ArmManagedRegister FromRegId(int reg_id) { ArmManagedRegister reg(reg_id); CHECK(reg.IsValidManagedRegister()); return reg; @@ -264,7 +293,7 @@ std::ostream& operator<<(std::ostream& os, const ArmManagedRegister& reg); } // namespace arm -inline arm::ArmManagedRegister ManagedRegister::AsArm() const { +constexpr inline arm::ArmManagedRegister ManagedRegister::AsArm() const { arm::ArmManagedRegister reg(id_); CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); return reg; diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index eb5112b464..6ed0e9b670 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -20,7 +20,7 @@ #include "offsets.h" #include "thread.h" -using namespace vixl; // NOLINT(build/namespaces) +using namespace vixl::aarch64; // NOLINT(build/namespaces) namespace art { namespace arm64 { @@ -28,627 +28,71 @@ namespace arm64 { #ifdef ___ #error "ARM64 Assembler macro already defined." #else -#define ___ vixl_masm_-> +#define ___ vixl_masm_. #endif void Arm64Assembler::FinalizeCode() { - for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) { - EmitExceptionPoll(exception.get()); - } ___ FinalizeCode(); } size_t Arm64Assembler::CodeSize() const { - return vixl_masm_->BufferCapacity() - vixl_masm_->RemainingBufferSpace(); + return vixl_masm_.GetSizeOfCodeGenerated(); } const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const { - return vixl_masm_->GetStartAddress<uint8_t*>(); + return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>(); } void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) { // Copy the instructions from the buffer. - MemoryRegion from(vixl_masm_->GetStartAddress<void*>(), CodeSize()); + MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize()); region.CopyFrom(0, from); } -void Arm64Assembler::GetCurrentThread(ManagedRegister tr) { - ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR)); -} - -void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) { - StoreToOffset(TR, SP, offset.Int32Value()); -} - -// See Arm64 PCS Section 5.2.2.1. -void Arm64Assembler::IncreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - AddConstant(SP, -adjust); - cfi().AdjustCFAOffset(adjust); -} - -// See Arm64 PCS Section 5.2.2.1. -void Arm64Assembler::DecreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - AddConstant(SP, adjust); - cfi().AdjustCFAOffset(-adjust); -} - -void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) { - AddConstant(rd, rd, value, cond); -} - -void Arm64Assembler::AddConstant(XRegister rd, XRegister rn, int32_t value, - Condition cond) { - if ((cond == al) || (cond == nv)) { - // VIXL macro-assembler handles all variants. - ___ Add(reg_x(rd), reg_x(rn), value); - } else { - // temp = rd + value - // rd = cond ? temp : rn - vixl::UseScratchRegisterScope temps(vixl_masm_); - temps.Exclude(reg_x(rd), reg_x(rn)); - vixl::Register temp = temps.AcquireX(); - ___ Add(temp, reg_x(rn), value); - ___ Csel(reg_x(rd), temp, reg_x(rd), cond); - } -} - -void Arm64Assembler::StoreWToOffset(StoreOperandType type, WRegister source, - XRegister base, int32_t offset) { - switch (type) { - case kStoreByte: - ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset)); - break; - case kStoreHalfword: - ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset)); - break; - case kStoreWord: - ___ Str(reg_w(source), MEM_OP(reg_x(base), offset)); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } -} - -void Arm64Assembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) { - CHECK_NE(source, SP); - ___ Str(reg_x(source), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) { - ___ Str(reg_s(source), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) { - ___ Str(reg_d(source), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) { - Arm64ManagedRegister src = m_src.AsArm64(); - if (src.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (src.IsWRegister()) { - CHECK_EQ(4u, size); - StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value()); - } else if (src.IsXRegister()) { - CHECK_EQ(8u, size); - StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); - } else if (src.IsSRegister()) { - StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value()); - } else { - CHECK(src.IsDRegister()) << src; - StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value()); - } -} - -void Arm64Assembler::StoreRef(FrameOffset offs, ManagedRegister m_src) { - Arm64ManagedRegister src = m_src.AsArm64(); - CHECK(src.IsXRegister()) << src; - StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP, - offs.Int32Value()); -} - -void Arm64Assembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) { - Arm64ManagedRegister src = m_src.AsArm64(); - CHECK(src.IsXRegister()) << src; - StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); -} - -void Arm64Assembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadImmediate(scratch.AsXRegister(), imm); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, - offs.Int32Value()); -} - -void Arm64Assembler::StoreImmediateToThread64(ThreadOffset<8> offs, uint32_t imm, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadImmediate(scratch.AsXRegister(), imm); - StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value()); -} - -void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs, - FrameOffset fr_offs, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); -} - -void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) { - vixl::UseScratchRegisterScope temps(vixl_masm_); - vixl::Register temp = temps.AcquireX(); - ___ Mov(temp, reg_x(SP)); - ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value())); -} - -void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source, - FrameOffset in_off, ManagedRegister m_scratch) { - Arm64ManagedRegister source = m_source.AsArm64(); - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value()); - LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8); -} - -// Load routines. -void Arm64Assembler::LoadImmediate(XRegister dest, int32_t value, - Condition cond) { - if ((cond == al) || (cond == nv)) { - ___ Mov(reg_x(dest), value); - } else { - // temp = value - // rd = cond ? temp : rd - if (value != 0) { - vixl::UseScratchRegisterScope temps(vixl_masm_); - temps.Exclude(reg_x(dest)); - vixl::Register temp = temps.AcquireX(); - ___ Mov(temp, value); - ___ Csel(reg_x(dest), temp, reg_x(dest), cond); - } else { - ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond); - } - } -} - -void Arm64Assembler::LoadWFromOffset(LoadOperandType type, WRegister dest, - XRegister base, int32_t offset) { - switch (type) { - case kLoadSignedByte: - ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadSignedHalfword: - ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadUnsignedByte: - ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadUnsignedHalfword: - ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - case kLoadWord: - ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset)); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } -} - -// Note: We can extend this member by adding load type info - see -// sign extended A64 load variants. -void Arm64Assembler::LoadFromOffset(XRegister dest, XRegister base, - int32_t offset) { - CHECK_NE(dest, SP); - ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::LoadSFromOffset(SRegister dest, XRegister base, - int32_t offset) { - ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::LoadDFromOffset(DRegister dest, XRegister base, - int32_t offset) { - ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset)); -} - -void Arm64Assembler::Load(Arm64ManagedRegister dest, XRegister base, - int32_t offset, size_t size) { - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size) << dest; - } else if (dest.IsWRegister()) { - CHECK_EQ(4u, size) << dest; - ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset)); - } else if (dest.IsXRegister()) { - CHECK_NE(dest.AsXRegister(), SP) << dest; - if (size == 4u) { - ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset)); - } else { - CHECK_EQ(8u, size) << dest; - ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset)); - } - } else if (dest.IsSRegister()) { - ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset)); - } else { - CHECK(dest.IsDRegister()) << dest; - ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset)); - } -} - -void Arm64Assembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { - return Load(m_dst.AsArm64(), SP, src.Int32Value(), size); -} - -void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) { - return Load(m_dst.AsArm64(), TR, src.Int32Value(), size); -} - -void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - CHECK(dst.IsXRegister()) << dst; - LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value()); -} - -void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base, MemberOffset offs, - bool unpoison_reference) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - Arm64ManagedRegister base = m_base.AsArm64(); - CHECK(dst.IsXRegister() && base.IsXRegister()); - LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(), - offs.Int32Value()); - if (unpoison_reference) { - WRegister ref_reg = dst.AsOverlappingWRegister(); - MaybeUnpoisonHeapReference(reg_w(ref_reg)); - } -} - void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, Offset offs) { Arm64ManagedRegister dst = m_dst.AsArm64(); Arm64ManagedRegister base = m_base.AsArm64(); CHECK(dst.IsXRegister() && base.IsXRegister()); // Remove dst and base form the temp list - higher level API uses IP1, IP0. - vixl::UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister())); ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); } -void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - CHECK(dst.IsXRegister()) << dst; - LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value()); -} - -// Copying routines. -void Arm64Assembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) { - Arm64ManagedRegister dst = m_dst.AsArm64(); - Arm64ManagedRegister src = m_src.AsArm64(); - if (!dst.Equals(src)) { - if (dst.IsXRegister()) { - if (size == 4) { - CHECK(src.IsWRegister()); - ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister())); - } else { - if (src.IsXRegister()) { - ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister())); - } else { - ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister())); - } - } - } else if (dst.IsWRegister()) { - CHECK(src.IsWRegister()) << src; - ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister())); - } else if (dst.IsSRegister()) { - CHECK(src.IsSRegister()) << src; - ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister())); - } else { - CHECK(dst.IsDRegister()) << dst; - CHECK(src.IsDRegister()) << src; - ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister())); - } - } -} - -void Arm64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> tr_offs, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); -} - -void Arm64Assembler::CopyRawPtrToThread64(ThreadOffset<8> tr_offs, - FrameOffset fr_offs, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); - StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); -} - -void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src, - ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), - SP, src.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), - SP, dest.Int32Value()); -} - -void Arm64Assembler::Copy(FrameOffset dest, FrameOffset src, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister base = src_base.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value()); - StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(ManagedRegister m_dest_base, Offset dest_offs, FrameOffset src, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister base = m_dest_base.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(), - dest_offs.Int32Value()); - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); - StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(FrameOffset /*dst*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; -} - -void Arm64Assembler::Copy(ManagedRegister m_dest, Offset dest_offset, - ManagedRegister m_src, Offset src_offset, - ManagedRegister m_scratch, size_t size) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - Arm64ManagedRegister src = m_src.AsArm64(); - Arm64ManagedRegister dest = m_dest.AsArm64(); - CHECK(dest.IsXRegister()) << dest; - CHECK(src.IsXRegister()) << src; - CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; - CHECK(size == 4 || size == 8) << size; - if (size == 4) { - if (scratch.IsWRegister()) { - LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(), - dest_offset.Int32Value()); - } else { - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(), - src_offset.Int32Value()); - StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(), - dest_offset.Int32Value()); - } - } else if (size == 8) { - LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value()); - StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value()); - } else { - UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; - } -} - -void Arm64Assembler::Copy(FrameOffset /*dst*/, Offset /*dest_offset*/, - FrameOffset /*src*/, Offset /*src_offset*/, - ManagedRegister /*scratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; -} - -void Arm64Assembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) { - // TODO: Should we check that m_scratch is IP? - see arm. - ___ Dmb(vixl::InnerShareable, vixl::BarrierAll); -} - -void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) { - Arm64ManagedRegister reg = mreg.AsArm64(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsWRegister()) << reg; - if (size == 1) { - ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } else { - ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } -} - -void Arm64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) { - Arm64ManagedRegister reg = mreg.AsArm64(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsWRegister()) << reg; - if (size == 1) { - ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } else { - ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); - } -} - -void Arm64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void Arm64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { - // TODO: not validating references. -} - -void Arm64Assembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) { - Arm64ManagedRegister base = m_base.AsArm64(); - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(base.IsXRegister()) << base; - CHECK(scratch.IsXRegister()) << scratch; - LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value()); - ___ Blr(reg_x(scratch.AsXRegister())); -} - void Arm64Assembler::JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) { Arm64ManagedRegister base = m_base.AsArm64(); Arm64ManagedRegister scratch = m_scratch.AsArm64(); CHECK(base.IsXRegister()) << base; CHECK(scratch.IsXRegister()) << scratch; // Remove base and scratch form the temp list - higher level API uses IP1, IP0. - vixl::UseScratchRegisterScope temps(vixl_masm_); + UseScratchRegisterScope temps(&vixl_masm_); temps.Exclude(reg_x(base.AsXRegister()), reg_x(scratch.AsXRegister())); ___ Ldr(reg_x(scratch.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); ___ Br(reg_x(scratch.AsXRegister())); } -void Arm64Assembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - // Call *(*(SP + base) + offset) - LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value()); - LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value()); - ___ Blr(reg_x(scratch.AsXRegister())); -} - -void Arm64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*scratch*/) { - UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant"; -} - -void Arm64Assembler::CreateHandleScopeEntry( - ManagedRegister m_out_reg, FrameOffset handle_scope_offs, ManagedRegister m_in_reg, - bool null_allowed) { - Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); - Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); - // For now we only hold stale handle scope entries in x registers. - CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg; - CHECK(out_reg.IsXRegister()) << out_reg; - if (null_allowed) { - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) - if (in_reg.IsNoRegister()) { - LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP, - handle_scope_offs.Int32Value()); - in_reg = out_reg; - } - ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0); - if (!out_reg.Equals(in_reg)) { - LoadImmediate(out_reg.AsXRegister(), 0, eq); - } - AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne); - } else { - AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al); - } -} - -void Arm64Assembler::CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handle_scope_offset, - ManagedRegister m_scratch, bool null_allowed) { - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - CHECK(scratch.IsXRegister()) << scratch; - if (null_allowed) { - LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, - handle_scope_offset.Int32Value()); - // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is - // the address in the handle scope holding the reference. - // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) - ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0); - // Move this logic in add constants with flags. - AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne); - } else { - AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al); - } - StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value()); -} - -void Arm64Assembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg, - ManagedRegister m_in_reg) { - Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); - Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); - CHECK(out_reg.IsXRegister()) << out_reg; - CHECK(in_reg.IsXRegister()) << in_reg; - vixl::Label exit; - if (!out_reg.Equals(in_reg)) { - // FIXME: Who sets the flags here? - LoadImmediate(out_reg.AsXRegister(), 0, eq); - } - ___ Cbz(reg_x(in_reg.AsXRegister()), &exit); - LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0); - ___ Bind(&exit); -} - -void Arm64Assembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) { - CHECK_ALIGNED(stack_adjust, kStackAlignment); - Arm64ManagedRegister scratch = m_scratch.AsArm64(); - exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust)); - LoadFromOffset(scratch.AsXRegister(), TR, Thread::ExceptionOffset<8>().Int32Value()); - ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry()); -} - -void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { - vixl::UseScratchRegisterScope temps(vixl_masm_); - temps.Exclude(reg_x(exception->scratch_.AsXRegister())); - vixl::Register temp = temps.AcquireX(); - - // Bind exception poll entry. - ___ Bind(exception->Entry()); - if (exception->stack_adjust_ != 0) { // Fix up the frame. - DecreaseFrameSize(exception->stack_adjust_); - } - // Pass exception object as argument. - // Don't care about preserving X0 as this won't return. - ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister())); - ___ Ldr(temp, MEM_OP(reg_x(TR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value())); - - ___ Blr(temp); - // Call should never return. - ___ Brk(); -} - static inline dwarf::Reg DWARFReg(CPURegister reg) { if (reg.IsFPRegister()) { - return dwarf::Reg::Arm64Fp(reg.code()); + return dwarf::Reg::Arm64Fp(reg.GetCode()); } else { - DCHECK_LT(reg.code(), 31u); // X0 - X30. - return dwarf::Reg::Arm64Core(reg.code()); + DCHECK_LT(reg.GetCode(), 31u); // X0 - X30. + return dwarf::Reg::Arm64Core(reg.GetCode()); } } -void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) { - int size = registers.RegisterSizeInBytes(); - const Register sp = vixl_masm_->StackPointer(); - while (registers.Count() >= 2) { +void Arm64Assembler::SpillRegisters(CPURegList registers, int offset) { + int size = registers.GetRegisterSizeInBytes(); + const Register sp = vixl_masm_.StackPointer(); + // Since we are operating on register pairs, we would like to align on + // double the standard size; on the other hand, we don't want to insert + // an extra store, which will happen if the number of registers is even. + if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) { + const CPURegister& dst0 = registers.PopLowestIndex(); + ___ Str(dst0, MemOperand(sp, offset)); + cfi_.RelOffset(DWARFReg(dst0), offset); + offset += size; + } + while (registers.GetCount() >= 2) { const CPURegister& dst0 = registers.PopLowestIndex(); const CPURegister& dst1 = registers.PopLowestIndex(); ___ Stp(dst0, dst1, MemOperand(sp, offset)); @@ -664,10 +108,17 @@ void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) { DCHECK(registers.IsEmpty()); } -void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) { - int size = registers.RegisterSizeInBytes(); - const Register sp = vixl_masm_->StackPointer(); - while (registers.Count() >= 2) { +void Arm64Assembler::UnspillRegisters(CPURegList registers, int offset) { + int size = registers.GetRegisterSizeInBytes(); + const Register sp = vixl_masm_.StackPointer(); + // Be consistent with the logic for spilling registers. + if (!IsAlignedParam(offset, 2 * size) && registers.GetCount() % 2 != 0) { + const CPURegister& dst0 = registers.PopLowestIndex(); + ___ Ldr(dst0, MemOperand(sp, offset)); + cfi_.Restore(DWARFReg(dst0)); + offset += size; + } + while (registers.GetCount() >= 2) { const CPURegister& dst0 = registers.PopLowestIndex(); const CPURegister& dst1 = registers.PopLowestIndex(); ___ Ldp(dst0, dst1, MemOperand(sp, offset)); @@ -683,117 +134,25 @@ void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) { DCHECK(registers.IsEmpty()); } -void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) { - // Setup VIXL CPURegList for callee-saves. - CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); - CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); - for (auto r : callee_save_regs) { - Arm64ManagedRegister reg = r.AsArm64(); - if (reg.IsXRegister()) { - core_reg_list.Combine(reg_x(reg.AsXRegister()).code()); - } else { - DCHECK(reg.IsDRegister()); - fp_reg_list.Combine(reg_d(reg.AsDRegister()).code()); - } - } - size_t core_reg_size = core_reg_list.TotalSizeInBytes(); - size_t fp_reg_size = fp_reg_list.TotalSizeInBytes(); - - // Increase frame to required size. - DCHECK_ALIGNED(frame_size, kStackAlignment); - DCHECK_GE(frame_size, core_reg_size + fp_reg_size + kArm64PointerSize); - IncreaseFrameSize(frame_size); - - // Save callee-saves. - SpillRegisters(core_reg_list, frame_size - core_reg_size); - SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); - - DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); - - // Write ArtMethod* - DCHECK(X0 == method_reg.AsArm64().AsXRegister()); - StoreToOffset(X0, SP, 0); - - // Write out entry spills - int32_t offset = frame_size + kArm64PointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - Arm64ManagedRegister reg = entry_spills.at(i).AsArm64(); - if (reg.IsNoRegister()) { - // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); - offset += spill.getSize(); - } else if (reg.IsXRegister()) { - StoreToOffset(reg.AsXRegister(), SP, offset); - offset += 8; - } else if (reg.IsWRegister()) { - StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset); - offset += 4; - } else if (reg.IsDRegister()) { - StoreDToOffset(reg.AsDRegister(), SP, offset); - offset += 8; - } else if (reg.IsSRegister()) { - StoreSToOffset(reg.AsSRegister(), SP, offset); - offset += 4; - } - } -} - -void Arm64Assembler::RemoveFrame(size_t frame_size, - const std::vector<ManagedRegister>& callee_save_regs) { - // Setup VIXL CPURegList for callee-saves. - CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); - CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); - for (auto r : callee_save_regs) { - Arm64ManagedRegister reg = r.AsArm64(); - if (reg.IsXRegister()) { - core_reg_list.Combine(reg_x(reg.AsXRegister()).code()); - } else { - DCHECK(reg.IsDRegister()); - fp_reg_list.Combine(reg_d(reg.AsDRegister()).code()); - } - } - size_t core_reg_size = core_reg_list.TotalSizeInBytes(); - size_t fp_reg_size = fp_reg_list.TotalSizeInBytes(); - - // For now we only check that the size of the frame is large enough to hold spills and method - // reference. - DCHECK_GE(frame_size, core_reg_size + fp_reg_size + kArm64PointerSize); - DCHECK_ALIGNED(frame_size, kStackAlignment); - - DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); - - cfi_.RememberState(); - - // Restore callee-saves. - UnspillRegisters(core_reg_list, frame_size - core_reg_size); - UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); - - // Decrease frame size to start of callee saved regs. - DecreaseFrameSize(frame_size); - - // Pop callee saved and return to LR. - ___ Ret(); - - // The CFI should be restored for any code that follows the exit block. - cfi_.RestoreState(); - cfi_.DefCFAOffset(frame_size); -} - -void Arm64Assembler::PoisonHeapReference(vixl::Register reg) { +void Arm64Assembler::PoisonHeapReference(Register reg) { DCHECK(reg.IsW()); // reg = -reg. - ___ Neg(reg, vixl::Operand(reg)); + ___ Neg(reg, Operand(reg)); } -void Arm64Assembler::UnpoisonHeapReference(vixl::Register reg) { +void Arm64Assembler::UnpoisonHeapReference(Register reg) { DCHECK(reg.IsW()); // reg = -reg. - ___ Neg(reg, vixl::Operand(reg)); + ___ Neg(reg, Operand(reg)); +} + +void Arm64Assembler::MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } } -void Arm64Assembler::MaybeUnpoisonHeapReference(vixl::Register reg) { +void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) { if (kPoisonHeapReferences) { UnpoisonHeapReference(reg); } diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index c4e5de7a67..66a7fed804 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -23,24 +23,21 @@ #include "base/arena_containers.h" #include "base/logging.h" -#include "constants_arm64.h" #include "utils/arm64/managed_register_arm64.h" #include "utils/assembler.h" #include "offsets.h" -// TODO: make vixl clean wrt -Wshadow. +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunknown-pragmas" #pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wmissing-noreturn" -#include "vixl/a64/macro-assembler-a64.h" -#include "vixl/a64/disasm-a64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop namespace art { namespace arm64 { -#define MEM_OP(...) vixl::MemOperand(__VA_ARGS__) +#define MEM_OP(...) vixl::aarch64::MemOperand(__VA_ARGS__) enum LoadOperandType { kLoadSignedByte, @@ -62,38 +59,13 @@ enum StoreOperandType { kStoreDWord }; -class Arm64Exception { - private: - Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) { - } - - vixl::Label* Entry() { return &exception_entry_; } - - // Register used for passing Thread::Current()->exception_ . - const Arm64ManagedRegister scratch_; - - // Stack adjust for ExceptionPool. - const size_t stack_adjust_; - - vixl::Label exception_entry_; - - friend class Arm64Assembler; - DISALLOW_COPY_AND_ASSIGN(Arm64Exception); -}; - class Arm64Assembler FINAL : public Assembler { public: - // We indicate the size of the initial code generation buffer to the VIXL - // assembler. From there we it will automatically manage the buffer. - explicit Arm64Assembler(ArenaAllocator* arena) - : Assembler(arena), - exception_blocks_(arena->Adapter(kArenaAllocAssembler)), - vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {} + explicit Arm64Assembler(ArenaAllocator* arena) : Assembler(arena) {} - virtual ~Arm64Assembler() { - delete vixl_masm_; - } + virtual ~Arm64Assembler() {} + + vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; } // Finalize the code. void FinalizeCode() OVERRIDE; @@ -105,115 +77,26 @@ class Arm64Assembler FINAL : public Assembler { // Copy instructions out of assembly buffer into the given region of memory. void FinalizeInstructions(const MemoryRegion& region); - void SpillRegisters(vixl::CPURegList registers, int offset); - void UnspillRegisters(vixl::CPURegList registers, int offset); - - // Emit code that will create an activation on the stack. - void BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - - // Emit code that will remove an activation from the stack. - void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) - OVERRIDE; - - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; - - // Store routines. - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch) - OVERRIDE; - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; - - // Load routines. - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; - void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE; + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs); - // Copying routines. - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, - ManagedRegister scratch) OVERRIDE; - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; - - // Sign extension. - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Zero extension. - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Exploit fast access in managed code to Thread::Current(). - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; - - // Set up out_reg to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. in_reg holds a possibly stale reference - // that can be used to avoid loading the handle scope entry to see if the value is - // null. - void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; - - // Set up out_off to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. - void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister scratch, bool null_allowed) OVERRIDE; - - // src holds a handle scope entry (Object**) load this into dst. - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; - - // Heap::VerifyObject on src. In some cases (such as a reference to this) we - // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - - // Call to address held at [base+offset]. - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE; + void SpillRegisters(vixl::aarch64::CPURegList registers, int offset); + void UnspillRegisters(vixl::aarch64::CPURegList registers, int offset); // Jump to address (not setting link register) void JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch); - // Generate code to check if Thread::Current()->exception_ is non-null - // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - // // Heap poisoning. // // Poison a heap reference contained in `reg`. - void PoisonHeapReference(vixl::Register reg); + void PoisonHeapReference(vixl::aarch64::Register reg); // Unpoison a heap reference contained in `reg`. - void UnpoisonHeapReference(vixl::Register reg); + void UnpoisonHeapReference(vixl::aarch64::Register reg); + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(vixl::aarch64::Register reg); // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. - void MaybeUnpoisonHeapReference(vixl::Register reg); + void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg); void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE { UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64"; @@ -222,60 +105,37 @@ class Arm64Assembler FINAL : public Assembler { UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM64"; } - private: - static vixl::Register reg_x(int code) { + static vixl::aarch64::Register reg_x(int code) { CHECK(code < kNumberOfXRegisters) << code; if (code == SP) { - return vixl::sp; + return vixl::aarch64::sp; } else if (code == XZR) { - return vixl::xzr; + return vixl::aarch64::xzr; } - return vixl::Register::XRegFromCode(code); + return vixl::aarch64::Register::GetXRegFromCode(code); } - static vixl::Register reg_w(int code) { + static vixl::aarch64::Register reg_w(int code) { CHECK(code < kNumberOfWRegisters) << code; if (code == WSP) { - return vixl::wsp; + return vixl::aarch64::wsp; } else if (code == WZR) { - return vixl::wzr; + return vixl::aarch64::wzr; } - return vixl::Register::WRegFromCode(code); + return vixl::aarch64::Register::GetWRegFromCode(code); } - static vixl::FPRegister reg_d(int code) { - return vixl::FPRegister::DRegFromCode(code); + static vixl::aarch64::FPRegister reg_d(int code) { + return vixl::aarch64::FPRegister::GetDRegFromCode(code); } - static vixl::FPRegister reg_s(int code) { - return vixl::FPRegister::SRegFromCode(code); + static vixl::aarch64::FPRegister reg_s(int code) { + return vixl::aarch64::FPRegister::GetSRegFromCode(code); } - // Emits Exception block. - void EmitExceptionPoll(Arm64Exception *exception); - - void StoreWToOffset(StoreOperandType type, WRegister source, - XRegister base, int32_t offset); - void StoreToOffset(XRegister source, XRegister base, int32_t offset); - void StoreSToOffset(SRegister source, XRegister base, int32_t offset); - void StoreDToOffset(DRegister source, XRegister base, int32_t offset); - - void LoadImmediate(XRegister dest, int32_t value, vixl::Condition cond = vixl::al); - void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size); - void LoadWFromOffset(LoadOperandType type, WRegister dest, - XRegister base, int32_t offset); - void LoadFromOffset(XRegister dest, XRegister base, int32_t offset); - void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset); - void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset); - void AddConstant(XRegister rd, int32_t value, vixl::Condition cond = vixl::al); - void AddConstant(XRegister rd, XRegister rn, int32_t value, vixl::Condition cond = vixl::al); - - // List of exception blocks to generate at the end of the code cache. - ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_; - - public: - // Vixl assembler. - vixl::MacroAssembler* const vixl_masm_; + private: + // VIXL assembler. + vixl::aarch64::MacroAssembler vixl_masm_; // Used for testing. friend class Arm64ManagedRegister_VixlRegisters_Test; diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h deleted file mode 100644 index 01e8be9de6..0000000000 --- a/compiler/utils/arm64/constants_arm64.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ -#define ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ - -#include <stdint.h> -#include <iosfwd> -#include "arch/arm64/registers_arm64.h" -#include "base/casts.h" -#include "base/logging.h" -#include "globals.h" - -// TODO: Extend this file by adding missing functionality. - -namespace art { -namespace arm64 { - -constexpr size_t kArm64BaseBufferSize = 4096; - -} // namespace arm64 -} // namespace art - -#endif // ART_COMPILER_UTILS_ARM64_CONSTANTS_ARM64_H_ diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc new file mode 100644 index 0000000000..9cd6884cbe --- /dev/null +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -0,0 +1,789 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_arm64.h" + +#include "base/logging.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "managed_register_arm64.h" +#include "offsets.h" +#include "thread.h" + +using namespace vixl::aarch64; // NOLINT(build/namespaces) + +namespace art { +namespace arm64 { + +#ifdef ___ +#error "ARM64 Assembler macro already defined." +#else +#define ___ asm_.GetVIXLAssembler()-> +#endif + +#define reg_x(X) Arm64Assembler::reg_x(X) +#define reg_w(W) Arm64Assembler::reg_w(W) +#define reg_d(D) Arm64Assembler::reg_d(D) +#define reg_s(S) Arm64Assembler::reg_s(S) + +Arm64JNIMacroAssembler::~Arm64JNIMacroAssembler() { +} + +void Arm64JNIMacroAssembler::FinalizeCode() { + for (const std::unique_ptr<Arm64Exception>& exception : exception_blocks_) { + EmitExceptionPoll(exception.get()); + } + ___ FinalizeCode(); +} + +void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) { + ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR)); +} + +void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) { + StoreToOffset(TR, SP, offset.Int32Value()); +} + +// See Arm64 PCS Section 5.2.2.1. +void Arm64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + AddConstant(SP, -adjust); + cfi().AdjustCFAOffset(adjust); +} + +// See Arm64 PCS Section 5.2.2.1. +void Arm64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + AddConstant(SP, adjust); + cfi().AdjustCFAOffset(-adjust); +} + +void Arm64JNIMacroAssembler::AddConstant(XRegister rd, int32_t value, Condition cond) { + AddConstant(rd, rd, value, cond); +} + +void Arm64JNIMacroAssembler::AddConstant(XRegister rd, + XRegister rn, + int32_t value, + Condition cond) { + if ((cond == al) || (cond == nv)) { + // VIXL macro-assembler handles all variants. + ___ Add(reg_x(rd), reg_x(rn), value); + } else { + // temp = rd + value + // rd = cond ? temp : rn + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(rd), reg_x(rn)); + Register temp = temps.AcquireX(); + ___ Add(temp, reg_x(rn), value); + ___ Csel(reg_x(rd), temp, reg_x(rd), cond); + } +} + +void Arm64JNIMacroAssembler::StoreWToOffset(StoreOperandType type, + WRegister source, + XRegister base, + int32_t offset) { + switch (type) { + case kStoreByte: + ___ Strb(reg_w(source), MEM_OP(reg_x(base), offset)); + break; + case kStoreHalfword: + ___ Strh(reg_w(source), MEM_OP(reg_x(base), offset)); + break; + case kStoreWord: + ___ Str(reg_w(source), MEM_OP(reg_x(base), offset)); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +void Arm64JNIMacroAssembler::StoreToOffset(XRegister source, XRegister base, int32_t offset) { + CHECK_NE(source, SP); + ___ Str(reg_x(source), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::StoreSToOffset(SRegister source, XRegister base, int32_t offset) { + ___ Str(reg_s(source), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::StoreDToOffset(DRegister source, XRegister base, int32_t offset) { + ___ Str(reg_d(source), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) { + Arm64ManagedRegister src = m_src.AsArm64(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsWRegister()) { + CHECK_EQ(4u, size); + StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value()); + } else if (src.IsXRegister()) { + CHECK_EQ(8u, size); + StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); + } else if (src.IsSRegister()) { + StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value()); + } else { + CHECK(src.IsDRegister()) << src; + StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value()); + } +} + +void Arm64JNIMacroAssembler::StoreRef(FrameOffset offs, ManagedRegister m_src) { + Arm64ManagedRegister src = m_src.AsArm64(); + CHECK(src.IsXRegister()) << src; + StoreWToOffset(kStoreWord, src.AsOverlappingWRegister(), SP, + offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) { + Arm64ManagedRegister src = m_src.AsArm64(); + CHECK(src.IsXRegister()) << src; + StoreToOffset(src.AsXRegister(), SP, offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs, + uint32_t imm, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadImmediate(scratch.AsXRegister(), imm); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, + offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs, + FrameOffset fr_offs, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + Register temp = temps.AcquireX(); + ___ Mov(temp, reg_x(SP)); + ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value())); +} + +void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off, + ManagedRegister m_source, + FrameOffset in_off, + ManagedRegister m_scratch) { + Arm64ManagedRegister source = m_source.AsArm64(); + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value()); + LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8); +} + +// Load routines. +void Arm64JNIMacroAssembler::LoadImmediate(XRegister dest, int32_t value, Condition cond) { + if ((cond == al) || (cond == nv)) { + ___ Mov(reg_x(dest), value); + } else { + // temp = value + // rd = cond ? temp : rd + if (value != 0) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(dest)); + Register temp = temps.AcquireX(); + ___ Mov(temp, value); + ___ Csel(reg_x(dest), temp, reg_x(dest), cond); + } else { + ___ Csel(reg_x(dest), reg_x(XZR), reg_x(dest), cond); + } + } +} + +void Arm64JNIMacroAssembler::LoadWFromOffset(LoadOperandType type, + WRegister dest, + XRegister base, + int32_t offset) { + switch (type) { + case kLoadSignedByte: + ___ Ldrsb(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadSignedHalfword: + ___ Ldrsh(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadUnsignedByte: + ___ Ldrb(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadUnsignedHalfword: + ___ Ldrh(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + case kLoadWord: + ___ Ldr(reg_w(dest), MEM_OP(reg_x(base), offset)); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } +} + +// Note: We can extend this member by adding load type info - see +// sign extended A64 load variants. +void Arm64JNIMacroAssembler::LoadFromOffset(XRegister dest, XRegister base, int32_t offset) { + CHECK_NE(dest, SP); + ___ Ldr(reg_x(dest), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::LoadSFromOffset(SRegister dest, XRegister base, int32_t offset) { + ___ Ldr(reg_s(dest), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::LoadDFromOffset(DRegister dest, XRegister base, int32_t offset) { + ___ Ldr(reg_d(dest), MEM_OP(reg_x(base), offset)); +} + +void Arm64JNIMacroAssembler::Load(Arm64ManagedRegister dest, + XRegister base, + int32_t offset, + size_t size) { + if (dest.IsNoRegister()) { + CHECK_EQ(0u, size) << dest; + } else if (dest.IsWRegister()) { + CHECK_EQ(4u, size) << dest; + ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset)); + } else if (dest.IsXRegister()) { + CHECK_NE(dest.AsXRegister(), SP) << dest; + + if (size == 1u) { + ___ Ldrb(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset)); + } else if (size == 4u) { + ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset)); + } else { + CHECK_EQ(8u, size) << dest; + ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset)); + } + } else if (dest.IsSRegister()) { + ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset)); + } else { + CHECK(dest.IsDRegister()) << dest; + ___ Ldr(reg_d(dest.AsDRegister()), MEM_OP(reg_x(base), offset)); + } +} + +void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { + return Load(m_dst.AsArm64(), SP, src.Int32Value(), size); +} + +void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, + ThreadOffset64 src, + size_t size) { + return Load(m_dst.AsArm64(), TR, src.Int32Value(), size); +} + +void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + CHECK(dst.IsXRegister()) << dst; + LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), SP, offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::LoadRef(ManagedRegister m_dst, + ManagedRegister m_base, + MemberOffset offs, + bool unpoison_reference) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + Arm64ManagedRegister base = m_base.AsArm64(); + CHECK(dst.IsXRegister() && base.IsXRegister()); + LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(), + offs.Int32Value()); + if (unpoison_reference) { + WRegister ref_reg = dst.AsOverlappingWRegister(); + asm_.MaybeUnpoisonHeapReference(reg_w(ref_reg)); + } +} + +void Arm64JNIMacroAssembler::LoadRawPtr(ManagedRegister m_dst, + ManagedRegister m_base, + Offset offs) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + Arm64ManagedRegister base = m_base.AsArm64(); + CHECK(dst.IsXRegister() && base.IsXRegister()); + // Remove dst and base form the temp list - higher level API uses IP1, IP0. + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(dst.AsXRegister()), reg_x(base.AsXRegister())); + ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value())); +} + +void Arm64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + CHECK(dst.IsXRegister()) << dst; + LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value()); +} + +// Copying routines. +void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) { + Arm64ManagedRegister dst = m_dst.AsArm64(); + Arm64ManagedRegister src = m_src.AsArm64(); + if (!dst.Equals(src)) { + if (dst.IsXRegister()) { + if (size == 4) { + CHECK(src.IsWRegister()); + ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister())); + } else { + if (src.IsXRegister()) { + ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister())); + } else { + ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister())); + } + } + } else if (dst.IsWRegister()) { + CHECK(src.IsWRegister()) << src; + ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister())); + } else if (dst.IsSRegister()) { + CHECK(src.IsSRegister()) << src; + ___ Fmov(reg_s(dst.AsSRegister()), reg_s(src.AsSRegister())); + } else { + CHECK(dst.IsDRegister()) << dst; + CHECK(src.IsDRegister()) << src; + ___ Fmov(reg_d(dst.AsDRegister()), reg_d(src.AsDRegister())); + } + } +} + +void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 tr_offs, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs, + FrameOffset fr_offs, + ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value()); + StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value()); +} + +void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), + SP, src.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), + SP, dest.Int32Value()); +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value()); + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + Arm64ManagedRegister base = src_base.AsArm64(); + CHECK(base.IsXRegister()) << base; + CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadWFromOffset(kLoadWord, scratch.AsWRegister(), base.AsXRegister(), + src_offset.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value()); + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), src_offset.Int32Value()); + StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest_base, + Offset dest_offs, + FrameOffset src, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + Arm64ManagedRegister base = m_dest_base.AsArm64(); + CHECK(base.IsXRegister()) << base; + CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsWRegister(), base.AsXRegister(), + dest_offs.Int32Value()); + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value()); + StoreToOffset(scratch.AsXRegister(), base.AsXRegister(), dest_offs.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/, + FrameOffset /*src_base*/, + Offset /*src_offset*/, + ManagedRegister /*mscratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; +} + +void Arm64JNIMacroAssembler::Copy(ManagedRegister m_dest, + Offset dest_offset, + ManagedRegister m_src, + Offset src_offset, + ManagedRegister m_scratch, + size_t size) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + Arm64ManagedRegister src = m_src.AsArm64(); + Arm64ManagedRegister dest = m_dest.AsArm64(); + CHECK(dest.IsXRegister()) << dest; + CHECK(src.IsXRegister()) << src; + CHECK(scratch.IsXRegister() || scratch.IsWRegister()) << scratch; + CHECK(size == 4 || size == 8) << size; + if (size == 4) { + if (scratch.IsWRegister()) { + LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsXRegister(), + src_offset.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsXRegister(), + dest_offset.Int32Value()); + } else { + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), src.AsXRegister(), + src_offset.Int32Value()); + StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), dest.AsXRegister(), + dest_offset.Int32Value()); + } + } else if (size == 8) { + LoadFromOffset(scratch.AsXRegister(), src.AsXRegister(), src_offset.Int32Value()); + StoreToOffset(scratch.AsXRegister(), dest.AsXRegister(), dest_offset.Int32Value()); + } else { + UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8"; + } +} + +void Arm64JNIMacroAssembler::Copy(FrameOffset /*dst*/, + Offset /*dest_offset*/, + FrameOffset /*src*/, + Offset /*src_offset*/, + ManagedRegister /*scratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL) << "Unimplemented Copy() variant"; +} + +void Arm64JNIMacroAssembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) { + // TODO: Should we check that m_scratch is IP? - see arm. + ___ Dmb(InnerShareable, BarrierAll); +} + +void Arm64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) { + Arm64ManagedRegister reg = mreg.AsArm64(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsWRegister()) << reg; + if (size == 1) { + ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } else { + ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } +} + +void Arm64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) { + Arm64ManagedRegister reg = mreg.AsArm64(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsWRegister()) << reg; + if (size == 1) { + ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } else { + ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister())); + } +} + +void Arm64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void Arm64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { + // TODO: not validating references. +} + +void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) { + Arm64ManagedRegister base = m_base.AsArm64(); + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(base.IsXRegister()) << base; + CHECK(scratch.IsXRegister()) << scratch; + LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value()); + ___ Blr(reg_x(scratch.AsXRegister())); +} + +void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + // Call *(*(SP + base) + offset) + LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value()); + LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value()); + ___ Blr(reg_x(scratch.AsXRegister())); +} + +void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED, + ManagedRegister scratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant"; +} + +void Arm64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister m_out_reg, + FrameOffset handle_scope_offs, + ManagedRegister m_in_reg, + bool null_allowed) { + Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); + Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); + // For now we only hold stale handle scope entries in x registers. + CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg; + CHECK(out_reg.IsXRegister()) << out_reg; + if (null_allowed) { + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) + if (in_reg.IsNoRegister()) { + LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP, + handle_scope_offs.Int32Value()); + in_reg = out_reg; + } + ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0); + if (!out_reg.Equals(in_reg)) { + LoadImmediate(out_reg.AsXRegister(), 0, eq); + } + AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), ne); + } else { + AddConstant(out_reg.AsXRegister(), SP, handle_scope_offs.Int32Value(), al); + } +} + +void Arm64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister m_scratch, + bool null_allowed) { + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + CHECK(scratch.IsXRegister()) << scratch; + if (null_allowed) { + LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, + handle_scope_offset.Int32Value()); + // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is + // the address in the handle scope holding the reference. + // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) + ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0); + // Move this logic in add constants with flags. + AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne); + } else { + AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al); + } + StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value()); +} + +void Arm64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg, + ManagedRegister m_in_reg) { + Arm64ManagedRegister out_reg = m_out_reg.AsArm64(); + Arm64ManagedRegister in_reg = m_in_reg.AsArm64(); + CHECK(out_reg.IsXRegister()) << out_reg; + CHECK(in_reg.IsXRegister()) << in_reg; + vixl::aarch64::Label exit; + if (!out_reg.Equals(in_reg)) { + // FIXME: Who sets the flags here? + LoadImmediate(out_reg.AsXRegister(), 0, eq); + } + ___ Cbz(reg_x(in_reg.AsXRegister()), &exit); + LoadFromOffset(out_reg.AsXRegister(), in_reg.AsXRegister(), 0); + ___ Bind(&exit); +} + +void Arm64JNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) { + CHECK_ALIGNED(stack_adjust, kStackAlignment); + Arm64ManagedRegister scratch = m_scratch.AsArm64(); + exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust)); + LoadFromOffset(scratch.AsXRegister(), + TR, + Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); + ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry()); +} + +std::unique_ptr<JNIMacroLabel> Arm64JNIMacroAssembler::CreateLabel() { + return std::unique_ptr<JNIMacroLabel>(new Arm64JNIMacroLabel()); +} + +void Arm64JNIMacroAssembler::Jump(JNIMacroLabel* label) { + CHECK(label != nullptr); + ___ B(Arm64JNIMacroLabel::Cast(label)->AsArm64()); +} + +void Arm64JNIMacroAssembler::Jump(JNIMacroLabel* label, + JNIMacroUnaryCondition condition, + ManagedRegister test) { + CHECK(label != nullptr); + + switch (condition) { + case JNIMacroUnaryCondition::kZero: + ___ Cbz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64()); + break; + case JNIMacroUnaryCondition::kNotZero: + ___ Cbnz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64()); + break; + default: + LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition); + UNREACHABLE(); + } +} + +void Arm64JNIMacroAssembler::Bind(JNIMacroLabel* label) { + CHECK(label != nullptr); + ___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64()); +} + +void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) { + UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); + temps.Exclude(reg_x(exception->scratch_.AsXRegister())); + Register temp = temps.AcquireX(); + + // Bind exception poll entry. + ___ Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); + } + // Pass exception object as argument. + // Don't care about preserving X0 as this won't return. + ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister())); + ___ Ldr(temp, + MEM_OP(reg_x(TR), + QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value())); + + ___ Blr(temp); + // Call should never return. + ___ Brk(); +} + +void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) { + // Setup VIXL CPURegList for callee-saves. + CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); + CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); + for (auto r : callee_save_regs) { + Arm64ManagedRegister reg = r.AsArm64(); + if (reg.IsXRegister()) { + core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode()); + } else { + DCHECK(reg.IsDRegister()); + fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode()); + } + } + size_t core_reg_size = core_reg_list.GetTotalSizeInBytes(); + size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes(); + + // Increase frame to required size. + DCHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize)); + IncreaseFrameSize(frame_size); + + // Save callee-saves. + asm_.SpillRegisters(core_reg_list, frame_size - core_reg_size); + asm_.SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); + + DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); + + // Write ArtMethod* + DCHECK(X0 == method_reg.AsArm64().AsXRegister()); + StoreToOffset(X0, SP, 0); + + // Write out entry spills + int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize); + for (size_t i = 0; i < entry_spills.size(); ++i) { + Arm64ManagedRegister reg = entry_spills.at(i).AsArm64(); + if (reg.IsNoRegister()) { + // only increment stack offset. + ManagedRegisterSpill spill = entry_spills.at(i); + offset += spill.getSize(); + } else if (reg.IsXRegister()) { + StoreToOffset(reg.AsXRegister(), SP, offset); + offset += 8; + } else if (reg.IsWRegister()) { + StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset); + offset += 4; + } else if (reg.IsDRegister()) { + StoreDToOffset(reg.AsDRegister(), SP, offset); + offset += 8; + } else if (reg.IsSRegister()) { + StoreSToOffset(reg.AsSRegister(), SP, offset); + offset += 4; + } + } +} + +void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> callee_save_regs) { + // Setup VIXL CPURegList for callee-saves. + CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0); + CPURegList fp_reg_list(CPURegister::kFPRegister, kDRegSize, 0); + for (auto r : callee_save_regs) { + Arm64ManagedRegister reg = r.AsArm64(); + if (reg.IsXRegister()) { + core_reg_list.Combine(reg_x(reg.AsXRegister()).GetCode()); + } else { + DCHECK(reg.IsDRegister()); + fp_reg_list.Combine(reg_d(reg.AsDRegister()).GetCode()); + } + } + size_t core_reg_size = core_reg_list.GetTotalSizeInBytes(); + size_t fp_reg_size = fp_reg_list.GetTotalSizeInBytes(); + + // For now we only check that the size of the frame is large enough to hold spills and method + // reference. + DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize)); + DCHECK_ALIGNED(frame_size, kStackAlignment); + + DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR))); + + cfi().RememberState(); + + // Restore callee-saves. + asm_.UnspillRegisters(core_reg_list, frame_size - core_reg_size); + asm_.UnspillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size); + + // Decrease frame size to start of callee saved regs. + DecreaseFrameSize(frame_size); + + // Pop callee saved and return to LR. + ___ Ret(); + + // The CFI should be restored for any code that follows the exit block. + cfi().RestoreState(); + cfi().DefCFAOffset(frame_size); +} + +#undef ___ + +} // namespace arm64 +} // namespace art diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h new file mode 100644 index 0000000000..264e99adab --- /dev/null +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h @@ -0,0 +1,247 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_ +#define ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_ + +#include <stdint.h> +#include <memory> +#include <vector> + +#include "assembler_arm64.h" +#include "base/arena_containers.h" +#include "base/enums.h" +#include "base/logging.h" +#include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" +#include "offsets.h" + +// TODO(VIXL): Make VIXL compile with -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch64/macro-assembler-aarch64.h" +#pragma GCC diagnostic pop + +namespace art { +namespace arm64 { + +class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> { + public: + explicit Arm64JNIMacroAssembler(ArenaAllocator* arena) + : JNIMacroAssemblerFwd(arena), + exception_blocks_(arena->Adapter(kArenaAllocAssembler)) {} + + ~Arm64JNIMacroAssembler(); + + // Finalize the code. + void FinalizeCode() OVERRIDE; + + // Emit code that will create an activation on the stack. + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack. + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) + OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines. + void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + void StoreStackOffsetToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; + void StoreSpanning(FrameOffset dest, + ManagedRegister src, + FrameOffset in_off, + ManagedRegister scratch) OVERRIDE; + + // Load routines. + void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE; + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void LoadRef(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs, + bool unpoison_reference) OVERRIDE; + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE; + + // Copying routines. + void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 thr_offs, + ManagedRegister scratch) OVERRIDE; + void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) + OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + + // Sign extension. + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension. + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current(). + void GetCurrentThread(ManagedRegister tr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + void CreateHandleScopeEntry(ManagedRegister out_reg, + FrameOffset handlescope_offset, + ManagedRegister in_reg, + bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handlescope_offset, + ManagedRegister scratch, + bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst. + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset]. + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; + void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label) OVERRIDE; + // Emit a conditional jump to the label by applying a unary condition test to the register. + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label) OVERRIDE; + + private: + class Arm64Exception { + public: + Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + vixl::aarch64::Label* Entry() { return &exception_entry_; } + + // Register used for passing Thread::Current()->exception_ . + const Arm64ManagedRegister scratch_; + + // Stack adjust for ExceptionPool. + const size_t stack_adjust_; + + vixl::aarch64::Label exception_entry_; + + private: + DISALLOW_COPY_AND_ASSIGN(Arm64Exception); + }; + + // Emits Exception block. + void EmitExceptionPoll(Arm64Exception *exception); + + void StoreWToOffset(StoreOperandType type, + WRegister source, + XRegister base, + int32_t offset); + void StoreToOffset(XRegister source, XRegister base, int32_t offset); + void StoreSToOffset(SRegister source, XRegister base, int32_t offset); + void StoreDToOffset(DRegister source, XRegister base, int32_t offset); + + void LoadImmediate(XRegister dest, + int32_t value, + vixl::aarch64::Condition cond = vixl::aarch64::al); + void Load(Arm64ManagedRegister dst, XRegister src, int32_t src_offset, size_t size); + void LoadWFromOffset(LoadOperandType type, + WRegister dest, + XRegister base, + int32_t offset); + void LoadFromOffset(XRegister dest, XRegister base, int32_t offset); + void LoadSFromOffset(SRegister dest, XRegister base, int32_t offset); + void LoadDFromOffset(DRegister dest, XRegister base, int32_t offset); + void AddConstant(XRegister rd, + int32_t value, + vixl::aarch64::Condition cond = vixl::aarch64::al); + void AddConstant(XRegister rd, + XRegister rn, + int32_t value, + vixl::aarch64::Condition cond = vixl::aarch64::al); + + // List of exception blocks to generate at the end of the code cache. + ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_; +}; + +class Arm64JNIMacroLabel FINAL + : public JNIMacroLabelCommon<Arm64JNIMacroLabel, + vixl::aarch64::Label, + kArm64> { + public: + vixl::aarch64::Label* AsArm64() { + return AsPlatformLabel(); + } +}; + +} // namespace arm64 +} // namespace art + +#endif // ART_COMPILER_UTILS_ARM64_JNI_MACRO_ASSEMBLER_ARM64_H_ diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h index 46be1c528c..7378a0a081 100644 --- a/compiler/utils/arm64/managed_register_arm64.h +++ b/compiler/utils/arm64/managed_register_arm64.h @@ -17,8 +17,8 @@ #ifndef ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_ #define ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_ +#include "arch/arm64/registers_arm64.h" #include "base/logging.h" -#include "constants_arm64.h" #include "debug/dwarf/register.h" #include "utils/managed_register.h" @@ -56,80 +56,80 @@ const int kNumberOfRegIds = kNumberOfXRegIds + kNumberOfWRegIds + class Arm64ManagedRegister : public ManagedRegister { public: - XRegister AsXRegister() const { + constexpr XRegister AsXRegister() const { CHECK(IsXRegister()); return static_cast<XRegister>(id_); } - WRegister AsWRegister() const { + constexpr WRegister AsWRegister() const { CHECK(IsWRegister()); return static_cast<WRegister>(id_ - kNumberOfXRegIds); } - DRegister AsDRegister() const { + constexpr DRegister AsDRegister() const { CHECK(IsDRegister()); return static_cast<DRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds); } - SRegister AsSRegister() const { + constexpr SRegister AsSRegister() const { CHECK(IsSRegister()); return static_cast<SRegister>(id_ - kNumberOfXRegIds - kNumberOfWRegIds - kNumberOfDRegIds); } - WRegister AsOverlappingWRegister() const { + constexpr WRegister AsOverlappingWRegister() const { CHECK(IsValidManagedRegister()); if (IsZeroRegister()) return WZR; return static_cast<WRegister>(AsXRegister()); } - XRegister AsOverlappingXRegister() const { + constexpr XRegister AsOverlappingXRegister() const { CHECK(IsValidManagedRegister()); return static_cast<XRegister>(AsWRegister()); } - SRegister AsOverlappingSRegister() const { + constexpr SRegister AsOverlappingSRegister() const { CHECK(IsValidManagedRegister()); return static_cast<SRegister>(AsDRegister()); } - DRegister AsOverlappingDRegister() const { + constexpr DRegister AsOverlappingDRegister() const { CHECK(IsValidManagedRegister()); return static_cast<DRegister>(AsSRegister()); } - bool IsXRegister() const { + constexpr bool IsXRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfXRegIds); } - bool IsWRegister() const { + constexpr bool IsWRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - kNumberOfXRegIds; return (0 <= test) && (test < kNumberOfWRegIds); } - bool IsDRegister() const { + constexpr bool IsDRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds); return (0 <= test) && (test < kNumberOfDRegIds); } - bool IsSRegister() const { + constexpr bool IsSRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfXRegIds + kNumberOfWRegIds + kNumberOfDRegIds); return (0 <= test) && (test < kNumberOfSRegIds); } - bool IsGPRegister() const { + constexpr bool IsGPRegister() const { return IsXRegister() || IsWRegister(); } - bool IsFPRegister() const { + constexpr bool IsFPRegister() const { return IsDRegister() || IsSRegister(); } - bool IsSameType(Arm64ManagedRegister test) const { + constexpr bool IsSameType(Arm64ManagedRegister test) const { CHECK(IsValidManagedRegister() && test.IsValidManagedRegister()); return (IsXRegister() && test.IsXRegister()) || @@ -145,53 +145,53 @@ class Arm64ManagedRegister : public ManagedRegister { void Print(std::ostream& os) const; - static Arm64ManagedRegister FromXRegister(XRegister r) { + static constexpr Arm64ManagedRegister FromXRegister(XRegister r) { CHECK_NE(r, kNoRegister); return FromRegId(r); } - static Arm64ManagedRegister FromWRegister(WRegister r) { + static constexpr Arm64ManagedRegister FromWRegister(WRegister r) { CHECK_NE(r, kNoWRegister); return FromRegId(r + kNumberOfXRegIds); } - static Arm64ManagedRegister FromDRegister(DRegister r) { + static constexpr Arm64ManagedRegister FromDRegister(DRegister r) { CHECK_NE(r, kNoDRegister); return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds)); } - static Arm64ManagedRegister FromSRegister(SRegister r) { + static constexpr Arm64ManagedRegister FromSRegister(SRegister r) { CHECK_NE(r, kNoSRegister); return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds + kNumberOfDRegIds)); } // Returns the X register overlapping W register r. - static Arm64ManagedRegister FromWRegisterX(WRegister r) { + static constexpr Arm64ManagedRegister FromWRegisterX(WRegister r) { CHECK_NE(r, kNoWRegister); return FromRegId(r); } // Return the D register overlapping S register r. - static Arm64ManagedRegister FromSRegisterD(SRegister r) { + static constexpr Arm64ManagedRegister FromSRegisterD(SRegister r) { CHECK_NE(r, kNoSRegister); return FromRegId(r + (kNumberOfXRegIds + kNumberOfWRegIds)); } private: - bool IsValidManagedRegister() const { + constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); } - bool IsStackPointer() const { + constexpr bool IsStackPointer() const { return IsXRegister() && (id_ == SP); } - bool IsZeroRegister() const { + constexpr bool IsZeroRegister() const { return IsXRegister() && (id_ == XZR); } - int RegId() const { + constexpr int RegId() const { CHECK(!IsNoRegister()); return id_; } @@ -202,9 +202,9 @@ class Arm64ManagedRegister : public ManagedRegister { friend class ManagedRegister; - explicit Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + explicit constexpr Arm64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} - static Arm64ManagedRegister FromRegId(int reg_id) { + static constexpr Arm64ManagedRegister FromRegId(int reg_id) { Arm64ManagedRegister reg(reg_id); CHECK(reg.IsValidManagedRegister()); return reg; @@ -215,7 +215,7 @@ std::ostream& operator<<(std::ostream& os, const Arm64ManagedRegister& reg); } // namespace arm64 -inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const { +constexpr inline arm64::Arm64ManagedRegister ManagedRegister::AsArm64() const { arm64::Arm64ManagedRegister reg(id_); CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); return reg; diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc index e27115d78a..79076b8ccc 100644 --- a/compiler/utils/arm64/managed_register_arm64_test.cc +++ b/compiler/utils/arm64/managed_register_arm64_test.cc @@ -591,149 +591,149 @@ TEST(Arm64ManagedRegister, Overlaps) { TEST(Arm64ManagedRegister, VixlRegisters) { // X Registers. - EXPECT_TRUE(vixl::x0.Is(Arm64Assembler::reg_x(X0))); - EXPECT_TRUE(vixl::x1.Is(Arm64Assembler::reg_x(X1))); - EXPECT_TRUE(vixl::x2.Is(Arm64Assembler::reg_x(X2))); - EXPECT_TRUE(vixl::x3.Is(Arm64Assembler::reg_x(X3))); - EXPECT_TRUE(vixl::x4.Is(Arm64Assembler::reg_x(X4))); - EXPECT_TRUE(vixl::x5.Is(Arm64Assembler::reg_x(X5))); - EXPECT_TRUE(vixl::x6.Is(Arm64Assembler::reg_x(X6))); - EXPECT_TRUE(vixl::x7.Is(Arm64Assembler::reg_x(X7))); - EXPECT_TRUE(vixl::x8.Is(Arm64Assembler::reg_x(X8))); - EXPECT_TRUE(vixl::x9.Is(Arm64Assembler::reg_x(X9))); - EXPECT_TRUE(vixl::x10.Is(Arm64Assembler::reg_x(X10))); - EXPECT_TRUE(vixl::x11.Is(Arm64Assembler::reg_x(X11))); - EXPECT_TRUE(vixl::x12.Is(Arm64Assembler::reg_x(X12))); - EXPECT_TRUE(vixl::x13.Is(Arm64Assembler::reg_x(X13))); - EXPECT_TRUE(vixl::x14.Is(Arm64Assembler::reg_x(X14))); - EXPECT_TRUE(vixl::x15.Is(Arm64Assembler::reg_x(X15))); - EXPECT_TRUE(vixl::x16.Is(Arm64Assembler::reg_x(X16))); - EXPECT_TRUE(vixl::x17.Is(Arm64Assembler::reg_x(X17))); - EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(X18))); - EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(X19))); - EXPECT_TRUE(vixl::x20.Is(Arm64Assembler::reg_x(X20))); - EXPECT_TRUE(vixl::x21.Is(Arm64Assembler::reg_x(X21))); - EXPECT_TRUE(vixl::x22.Is(Arm64Assembler::reg_x(X22))); - EXPECT_TRUE(vixl::x23.Is(Arm64Assembler::reg_x(X23))); - EXPECT_TRUE(vixl::x24.Is(Arm64Assembler::reg_x(X24))); - EXPECT_TRUE(vixl::x25.Is(Arm64Assembler::reg_x(X25))); - EXPECT_TRUE(vixl::x26.Is(Arm64Assembler::reg_x(X26))); - EXPECT_TRUE(vixl::x27.Is(Arm64Assembler::reg_x(X27))); - EXPECT_TRUE(vixl::x28.Is(Arm64Assembler::reg_x(X28))); - EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29))); - EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30))); - - EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(TR))); - EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0))); - EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1))); - EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP))); - EXPECT_TRUE(vixl::lr.Is(Arm64Assembler::reg_x(LR))); - EXPECT_TRUE(vixl::sp.Is(Arm64Assembler::reg_x(SP))); - EXPECT_TRUE(vixl::xzr.Is(Arm64Assembler::reg_x(XZR))); + EXPECT_TRUE(vixl::aarch64::x0.Is(Arm64Assembler::reg_x(X0))); + EXPECT_TRUE(vixl::aarch64::x1.Is(Arm64Assembler::reg_x(X1))); + EXPECT_TRUE(vixl::aarch64::x2.Is(Arm64Assembler::reg_x(X2))); + EXPECT_TRUE(vixl::aarch64::x3.Is(Arm64Assembler::reg_x(X3))); + EXPECT_TRUE(vixl::aarch64::x4.Is(Arm64Assembler::reg_x(X4))); + EXPECT_TRUE(vixl::aarch64::x5.Is(Arm64Assembler::reg_x(X5))); + EXPECT_TRUE(vixl::aarch64::x6.Is(Arm64Assembler::reg_x(X6))); + EXPECT_TRUE(vixl::aarch64::x7.Is(Arm64Assembler::reg_x(X7))); + EXPECT_TRUE(vixl::aarch64::x8.Is(Arm64Assembler::reg_x(X8))); + EXPECT_TRUE(vixl::aarch64::x9.Is(Arm64Assembler::reg_x(X9))); + EXPECT_TRUE(vixl::aarch64::x10.Is(Arm64Assembler::reg_x(X10))); + EXPECT_TRUE(vixl::aarch64::x11.Is(Arm64Assembler::reg_x(X11))); + EXPECT_TRUE(vixl::aarch64::x12.Is(Arm64Assembler::reg_x(X12))); + EXPECT_TRUE(vixl::aarch64::x13.Is(Arm64Assembler::reg_x(X13))); + EXPECT_TRUE(vixl::aarch64::x14.Is(Arm64Assembler::reg_x(X14))); + EXPECT_TRUE(vixl::aarch64::x15.Is(Arm64Assembler::reg_x(X15))); + EXPECT_TRUE(vixl::aarch64::x16.Is(Arm64Assembler::reg_x(X16))); + EXPECT_TRUE(vixl::aarch64::x17.Is(Arm64Assembler::reg_x(X17))); + EXPECT_TRUE(vixl::aarch64::x18.Is(Arm64Assembler::reg_x(X18))); + EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(X19))); + EXPECT_TRUE(vixl::aarch64::x20.Is(Arm64Assembler::reg_x(X20))); + EXPECT_TRUE(vixl::aarch64::x21.Is(Arm64Assembler::reg_x(X21))); + EXPECT_TRUE(vixl::aarch64::x22.Is(Arm64Assembler::reg_x(X22))); + EXPECT_TRUE(vixl::aarch64::x23.Is(Arm64Assembler::reg_x(X23))); + EXPECT_TRUE(vixl::aarch64::x24.Is(Arm64Assembler::reg_x(X24))); + EXPECT_TRUE(vixl::aarch64::x25.Is(Arm64Assembler::reg_x(X25))); + EXPECT_TRUE(vixl::aarch64::x26.Is(Arm64Assembler::reg_x(X26))); + EXPECT_TRUE(vixl::aarch64::x27.Is(Arm64Assembler::reg_x(X27))); + EXPECT_TRUE(vixl::aarch64::x28.Is(Arm64Assembler::reg_x(X28))); + EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(X29))); + EXPECT_TRUE(vixl::aarch64::x30.Is(Arm64Assembler::reg_x(X30))); + + EXPECT_TRUE(vixl::aarch64::x19.Is(Arm64Assembler::reg_x(TR))); + EXPECT_TRUE(vixl::aarch64::ip0.Is(Arm64Assembler::reg_x(IP0))); + EXPECT_TRUE(vixl::aarch64::ip1.Is(Arm64Assembler::reg_x(IP1))); + EXPECT_TRUE(vixl::aarch64::x29.Is(Arm64Assembler::reg_x(FP))); + EXPECT_TRUE(vixl::aarch64::lr.Is(Arm64Assembler::reg_x(LR))); + EXPECT_TRUE(vixl::aarch64::sp.Is(Arm64Assembler::reg_x(SP))); + EXPECT_TRUE(vixl::aarch64::xzr.Is(Arm64Assembler::reg_x(XZR))); // W Registers. - EXPECT_TRUE(vixl::w0.Is(Arm64Assembler::reg_w(W0))); - EXPECT_TRUE(vixl::w1.Is(Arm64Assembler::reg_w(W1))); - EXPECT_TRUE(vixl::w2.Is(Arm64Assembler::reg_w(W2))); - EXPECT_TRUE(vixl::w3.Is(Arm64Assembler::reg_w(W3))); - EXPECT_TRUE(vixl::w4.Is(Arm64Assembler::reg_w(W4))); - EXPECT_TRUE(vixl::w5.Is(Arm64Assembler::reg_w(W5))); - EXPECT_TRUE(vixl::w6.Is(Arm64Assembler::reg_w(W6))); - EXPECT_TRUE(vixl::w7.Is(Arm64Assembler::reg_w(W7))); - EXPECT_TRUE(vixl::w8.Is(Arm64Assembler::reg_w(W8))); - EXPECT_TRUE(vixl::w9.Is(Arm64Assembler::reg_w(W9))); - EXPECT_TRUE(vixl::w10.Is(Arm64Assembler::reg_w(W10))); - EXPECT_TRUE(vixl::w11.Is(Arm64Assembler::reg_w(W11))); - EXPECT_TRUE(vixl::w12.Is(Arm64Assembler::reg_w(W12))); - EXPECT_TRUE(vixl::w13.Is(Arm64Assembler::reg_w(W13))); - EXPECT_TRUE(vixl::w14.Is(Arm64Assembler::reg_w(W14))); - EXPECT_TRUE(vixl::w15.Is(Arm64Assembler::reg_w(W15))); - EXPECT_TRUE(vixl::w16.Is(Arm64Assembler::reg_w(W16))); - EXPECT_TRUE(vixl::w17.Is(Arm64Assembler::reg_w(W17))); - EXPECT_TRUE(vixl::w18.Is(Arm64Assembler::reg_w(W18))); - EXPECT_TRUE(vixl::w19.Is(Arm64Assembler::reg_w(W19))); - EXPECT_TRUE(vixl::w20.Is(Arm64Assembler::reg_w(W20))); - EXPECT_TRUE(vixl::w21.Is(Arm64Assembler::reg_w(W21))); - EXPECT_TRUE(vixl::w22.Is(Arm64Assembler::reg_w(W22))); - EXPECT_TRUE(vixl::w23.Is(Arm64Assembler::reg_w(W23))); - EXPECT_TRUE(vixl::w24.Is(Arm64Assembler::reg_w(W24))); - EXPECT_TRUE(vixl::w25.Is(Arm64Assembler::reg_w(W25))); - EXPECT_TRUE(vixl::w26.Is(Arm64Assembler::reg_w(W26))); - EXPECT_TRUE(vixl::w27.Is(Arm64Assembler::reg_w(W27))); - EXPECT_TRUE(vixl::w28.Is(Arm64Assembler::reg_w(W28))); - EXPECT_TRUE(vixl::w29.Is(Arm64Assembler::reg_w(W29))); - EXPECT_TRUE(vixl::w30.Is(Arm64Assembler::reg_w(W30))); - EXPECT_TRUE(vixl::w31.Is(Arm64Assembler::reg_w(WZR))); - EXPECT_TRUE(vixl::wzr.Is(Arm64Assembler::reg_w(WZR))); - EXPECT_TRUE(vixl::wsp.Is(Arm64Assembler::reg_w(WSP))); + EXPECT_TRUE(vixl::aarch64::w0.Is(Arm64Assembler::reg_w(W0))); + EXPECT_TRUE(vixl::aarch64::w1.Is(Arm64Assembler::reg_w(W1))); + EXPECT_TRUE(vixl::aarch64::w2.Is(Arm64Assembler::reg_w(W2))); + EXPECT_TRUE(vixl::aarch64::w3.Is(Arm64Assembler::reg_w(W3))); + EXPECT_TRUE(vixl::aarch64::w4.Is(Arm64Assembler::reg_w(W4))); + EXPECT_TRUE(vixl::aarch64::w5.Is(Arm64Assembler::reg_w(W5))); + EXPECT_TRUE(vixl::aarch64::w6.Is(Arm64Assembler::reg_w(W6))); + EXPECT_TRUE(vixl::aarch64::w7.Is(Arm64Assembler::reg_w(W7))); + EXPECT_TRUE(vixl::aarch64::w8.Is(Arm64Assembler::reg_w(W8))); + EXPECT_TRUE(vixl::aarch64::w9.Is(Arm64Assembler::reg_w(W9))); + EXPECT_TRUE(vixl::aarch64::w10.Is(Arm64Assembler::reg_w(W10))); + EXPECT_TRUE(vixl::aarch64::w11.Is(Arm64Assembler::reg_w(W11))); + EXPECT_TRUE(vixl::aarch64::w12.Is(Arm64Assembler::reg_w(W12))); + EXPECT_TRUE(vixl::aarch64::w13.Is(Arm64Assembler::reg_w(W13))); + EXPECT_TRUE(vixl::aarch64::w14.Is(Arm64Assembler::reg_w(W14))); + EXPECT_TRUE(vixl::aarch64::w15.Is(Arm64Assembler::reg_w(W15))); + EXPECT_TRUE(vixl::aarch64::w16.Is(Arm64Assembler::reg_w(W16))); + EXPECT_TRUE(vixl::aarch64::w17.Is(Arm64Assembler::reg_w(W17))); + EXPECT_TRUE(vixl::aarch64::w18.Is(Arm64Assembler::reg_w(W18))); + EXPECT_TRUE(vixl::aarch64::w19.Is(Arm64Assembler::reg_w(W19))); + EXPECT_TRUE(vixl::aarch64::w20.Is(Arm64Assembler::reg_w(W20))); + EXPECT_TRUE(vixl::aarch64::w21.Is(Arm64Assembler::reg_w(W21))); + EXPECT_TRUE(vixl::aarch64::w22.Is(Arm64Assembler::reg_w(W22))); + EXPECT_TRUE(vixl::aarch64::w23.Is(Arm64Assembler::reg_w(W23))); + EXPECT_TRUE(vixl::aarch64::w24.Is(Arm64Assembler::reg_w(W24))); + EXPECT_TRUE(vixl::aarch64::w25.Is(Arm64Assembler::reg_w(W25))); + EXPECT_TRUE(vixl::aarch64::w26.Is(Arm64Assembler::reg_w(W26))); + EXPECT_TRUE(vixl::aarch64::w27.Is(Arm64Assembler::reg_w(W27))); + EXPECT_TRUE(vixl::aarch64::w28.Is(Arm64Assembler::reg_w(W28))); + EXPECT_TRUE(vixl::aarch64::w29.Is(Arm64Assembler::reg_w(W29))); + EXPECT_TRUE(vixl::aarch64::w30.Is(Arm64Assembler::reg_w(W30))); + EXPECT_TRUE(vixl::aarch64::w31.Is(Arm64Assembler::reg_w(WZR))); + EXPECT_TRUE(vixl::aarch64::wzr.Is(Arm64Assembler::reg_w(WZR))); + EXPECT_TRUE(vixl::aarch64::wsp.Is(Arm64Assembler::reg_w(WSP))); // D Registers. - EXPECT_TRUE(vixl::d0.Is(Arm64Assembler::reg_d(D0))); - EXPECT_TRUE(vixl::d1.Is(Arm64Assembler::reg_d(D1))); - EXPECT_TRUE(vixl::d2.Is(Arm64Assembler::reg_d(D2))); - EXPECT_TRUE(vixl::d3.Is(Arm64Assembler::reg_d(D3))); - EXPECT_TRUE(vixl::d4.Is(Arm64Assembler::reg_d(D4))); - EXPECT_TRUE(vixl::d5.Is(Arm64Assembler::reg_d(D5))); - EXPECT_TRUE(vixl::d6.Is(Arm64Assembler::reg_d(D6))); - EXPECT_TRUE(vixl::d7.Is(Arm64Assembler::reg_d(D7))); - EXPECT_TRUE(vixl::d8.Is(Arm64Assembler::reg_d(D8))); - EXPECT_TRUE(vixl::d9.Is(Arm64Assembler::reg_d(D9))); - EXPECT_TRUE(vixl::d10.Is(Arm64Assembler::reg_d(D10))); - EXPECT_TRUE(vixl::d11.Is(Arm64Assembler::reg_d(D11))); - EXPECT_TRUE(vixl::d12.Is(Arm64Assembler::reg_d(D12))); - EXPECT_TRUE(vixl::d13.Is(Arm64Assembler::reg_d(D13))); - EXPECT_TRUE(vixl::d14.Is(Arm64Assembler::reg_d(D14))); - EXPECT_TRUE(vixl::d15.Is(Arm64Assembler::reg_d(D15))); - EXPECT_TRUE(vixl::d16.Is(Arm64Assembler::reg_d(D16))); - EXPECT_TRUE(vixl::d17.Is(Arm64Assembler::reg_d(D17))); - EXPECT_TRUE(vixl::d18.Is(Arm64Assembler::reg_d(D18))); - EXPECT_TRUE(vixl::d19.Is(Arm64Assembler::reg_d(D19))); - EXPECT_TRUE(vixl::d20.Is(Arm64Assembler::reg_d(D20))); - EXPECT_TRUE(vixl::d21.Is(Arm64Assembler::reg_d(D21))); - EXPECT_TRUE(vixl::d22.Is(Arm64Assembler::reg_d(D22))); - EXPECT_TRUE(vixl::d23.Is(Arm64Assembler::reg_d(D23))); - EXPECT_TRUE(vixl::d24.Is(Arm64Assembler::reg_d(D24))); - EXPECT_TRUE(vixl::d25.Is(Arm64Assembler::reg_d(D25))); - EXPECT_TRUE(vixl::d26.Is(Arm64Assembler::reg_d(D26))); - EXPECT_TRUE(vixl::d27.Is(Arm64Assembler::reg_d(D27))); - EXPECT_TRUE(vixl::d28.Is(Arm64Assembler::reg_d(D28))); - EXPECT_TRUE(vixl::d29.Is(Arm64Assembler::reg_d(D29))); - EXPECT_TRUE(vixl::d30.Is(Arm64Assembler::reg_d(D30))); - EXPECT_TRUE(vixl::d31.Is(Arm64Assembler::reg_d(D31))); + EXPECT_TRUE(vixl::aarch64::d0.Is(Arm64Assembler::reg_d(D0))); + EXPECT_TRUE(vixl::aarch64::d1.Is(Arm64Assembler::reg_d(D1))); + EXPECT_TRUE(vixl::aarch64::d2.Is(Arm64Assembler::reg_d(D2))); + EXPECT_TRUE(vixl::aarch64::d3.Is(Arm64Assembler::reg_d(D3))); + EXPECT_TRUE(vixl::aarch64::d4.Is(Arm64Assembler::reg_d(D4))); + EXPECT_TRUE(vixl::aarch64::d5.Is(Arm64Assembler::reg_d(D5))); + EXPECT_TRUE(vixl::aarch64::d6.Is(Arm64Assembler::reg_d(D6))); + EXPECT_TRUE(vixl::aarch64::d7.Is(Arm64Assembler::reg_d(D7))); + EXPECT_TRUE(vixl::aarch64::d8.Is(Arm64Assembler::reg_d(D8))); + EXPECT_TRUE(vixl::aarch64::d9.Is(Arm64Assembler::reg_d(D9))); + EXPECT_TRUE(vixl::aarch64::d10.Is(Arm64Assembler::reg_d(D10))); + EXPECT_TRUE(vixl::aarch64::d11.Is(Arm64Assembler::reg_d(D11))); + EXPECT_TRUE(vixl::aarch64::d12.Is(Arm64Assembler::reg_d(D12))); + EXPECT_TRUE(vixl::aarch64::d13.Is(Arm64Assembler::reg_d(D13))); + EXPECT_TRUE(vixl::aarch64::d14.Is(Arm64Assembler::reg_d(D14))); + EXPECT_TRUE(vixl::aarch64::d15.Is(Arm64Assembler::reg_d(D15))); + EXPECT_TRUE(vixl::aarch64::d16.Is(Arm64Assembler::reg_d(D16))); + EXPECT_TRUE(vixl::aarch64::d17.Is(Arm64Assembler::reg_d(D17))); + EXPECT_TRUE(vixl::aarch64::d18.Is(Arm64Assembler::reg_d(D18))); + EXPECT_TRUE(vixl::aarch64::d19.Is(Arm64Assembler::reg_d(D19))); + EXPECT_TRUE(vixl::aarch64::d20.Is(Arm64Assembler::reg_d(D20))); + EXPECT_TRUE(vixl::aarch64::d21.Is(Arm64Assembler::reg_d(D21))); + EXPECT_TRUE(vixl::aarch64::d22.Is(Arm64Assembler::reg_d(D22))); + EXPECT_TRUE(vixl::aarch64::d23.Is(Arm64Assembler::reg_d(D23))); + EXPECT_TRUE(vixl::aarch64::d24.Is(Arm64Assembler::reg_d(D24))); + EXPECT_TRUE(vixl::aarch64::d25.Is(Arm64Assembler::reg_d(D25))); + EXPECT_TRUE(vixl::aarch64::d26.Is(Arm64Assembler::reg_d(D26))); + EXPECT_TRUE(vixl::aarch64::d27.Is(Arm64Assembler::reg_d(D27))); + EXPECT_TRUE(vixl::aarch64::d28.Is(Arm64Assembler::reg_d(D28))); + EXPECT_TRUE(vixl::aarch64::d29.Is(Arm64Assembler::reg_d(D29))); + EXPECT_TRUE(vixl::aarch64::d30.Is(Arm64Assembler::reg_d(D30))); + EXPECT_TRUE(vixl::aarch64::d31.Is(Arm64Assembler::reg_d(D31))); // S Registers. - EXPECT_TRUE(vixl::s0.Is(Arm64Assembler::reg_s(S0))); - EXPECT_TRUE(vixl::s1.Is(Arm64Assembler::reg_s(S1))); - EXPECT_TRUE(vixl::s2.Is(Arm64Assembler::reg_s(S2))); - EXPECT_TRUE(vixl::s3.Is(Arm64Assembler::reg_s(S3))); - EXPECT_TRUE(vixl::s4.Is(Arm64Assembler::reg_s(S4))); - EXPECT_TRUE(vixl::s5.Is(Arm64Assembler::reg_s(S5))); - EXPECT_TRUE(vixl::s6.Is(Arm64Assembler::reg_s(S6))); - EXPECT_TRUE(vixl::s7.Is(Arm64Assembler::reg_s(S7))); - EXPECT_TRUE(vixl::s8.Is(Arm64Assembler::reg_s(S8))); - EXPECT_TRUE(vixl::s9.Is(Arm64Assembler::reg_s(S9))); - EXPECT_TRUE(vixl::s10.Is(Arm64Assembler::reg_s(S10))); - EXPECT_TRUE(vixl::s11.Is(Arm64Assembler::reg_s(S11))); - EXPECT_TRUE(vixl::s12.Is(Arm64Assembler::reg_s(S12))); - EXPECT_TRUE(vixl::s13.Is(Arm64Assembler::reg_s(S13))); - EXPECT_TRUE(vixl::s14.Is(Arm64Assembler::reg_s(S14))); - EXPECT_TRUE(vixl::s15.Is(Arm64Assembler::reg_s(S15))); - EXPECT_TRUE(vixl::s16.Is(Arm64Assembler::reg_s(S16))); - EXPECT_TRUE(vixl::s17.Is(Arm64Assembler::reg_s(S17))); - EXPECT_TRUE(vixl::s18.Is(Arm64Assembler::reg_s(S18))); - EXPECT_TRUE(vixl::s19.Is(Arm64Assembler::reg_s(S19))); - EXPECT_TRUE(vixl::s20.Is(Arm64Assembler::reg_s(S20))); - EXPECT_TRUE(vixl::s21.Is(Arm64Assembler::reg_s(S21))); - EXPECT_TRUE(vixl::s22.Is(Arm64Assembler::reg_s(S22))); - EXPECT_TRUE(vixl::s23.Is(Arm64Assembler::reg_s(S23))); - EXPECT_TRUE(vixl::s24.Is(Arm64Assembler::reg_s(S24))); - EXPECT_TRUE(vixl::s25.Is(Arm64Assembler::reg_s(S25))); - EXPECT_TRUE(vixl::s26.Is(Arm64Assembler::reg_s(S26))); - EXPECT_TRUE(vixl::s27.Is(Arm64Assembler::reg_s(S27))); - EXPECT_TRUE(vixl::s28.Is(Arm64Assembler::reg_s(S28))); - EXPECT_TRUE(vixl::s29.Is(Arm64Assembler::reg_s(S29))); - EXPECT_TRUE(vixl::s30.Is(Arm64Assembler::reg_s(S30))); - EXPECT_TRUE(vixl::s31.Is(Arm64Assembler::reg_s(S31))); + EXPECT_TRUE(vixl::aarch64::s0.Is(Arm64Assembler::reg_s(S0))); + EXPECT_TRUE(vixl::aarch64::s1.Is(Arm64Assembler::reg_s(S1))); + EXPECT_TRUE(vixl::aarch64::s2.Is(Arm64Assembler::reg_s(S2))); + EXPECT_TRUE(vixl::aarch64::s3.Is(Arm64Assembler::reg_s(S3))); + EXPECT_TRUE(vixl::aarch64::s4.Is(Arm64Assembler::reg_s(S4))); + EXPECT_TRUE(vixl::aarch64::s5.Is(Arm64Assembler::reg_s(S5))); + EXPECT_TRUE(vixl::aarch64::s6.Is(Arm64Assembler::reg_s(S6))); + EXPECT_TRUE(vixl::aarch64::s7.Is(Arm64Assembler::reg_s(S7))); + EXPECT_TRUE(vixl::aarch64::s8.Is(Arm64Assembler::reg_s(S8))); + EXPECT_TRUE(vixl::aarch64::s9.Is(Arm64Assembler::reg_s(S9))); + EXPECT_TRUE(vixl::aarch64::s10.Is(Arm64Assembler::reg_s(S10))); + EXPECT_TRUE(vixl::aarch64::s11.Is(Arm64Assembler::reg_s(S11))); + EXPECT_TRUE(vixl::aarch64::s12.Is(Arm64Assembler::reg_s(S12))); + EXPECT_TRUE(vixl::aarch64::s13.Is(Arm64Assembler::reg_s(S13))); + EXPECT_TRUE(vixl::aarch64::s14.Is(Arm64Assembler::reg_s(S14))); + EXPECT_TRUE(vixl::aarch64::s15.Is(Arm64Assembler::reg_s(S15))); + EXPECT_TRUE(vixl::aarch64::s16.Is(Arm64Assembler::reg_s(S16))); + EXPECT_TRUE(vixl::aarch64::s17.Is(Arm64Assembler::reg_s(S17))); + EXPECT_TRUE(vixl::aarch64::s18.Is(Arm64Assembler::reg_s(S18))); + EXPECT_TRUE(vixl::aarch64::s19.Is(Arm64Assembler::reg_s(S19))); + EXPECT_TRUE(vixl::aarch64::s20.Is(Arm64Assembler::reg_s(S20))); + EXPECT_TRUE(vixl::aarch64::s21.Is(Arm64Assembler::reg_s(S21))); + EXPECT_TRUE(vixl::aarch64::s22.Is(Arm64Assembler::reg_s(S22))); + EXPECT_TRUE(vixl::aarch64::s23.Is(Arm64Assembler::reg_s(S23))); + EXPECT_TRUE(vixl::aarch64::s24.Is(Arm64Assembler::reg_s(S24))); + EXPECT_TRUE(vixl::aarch64::s25.Is(Arm64Assembler::reg_s(S25))); + EXPECT_TRUE(vixl::aarch64::s26.Is(Arm64Assembler::reg_s(S26))); + EXPECT_TRUE(vixl::aarch64::s27.Is(Arm64Assembler::reg_s(S27))); + EXPECT_TRUE(vixl::aarch64::s28.Is(Arm64Assembler::reg_s(S28))); + EXPECT_TRUE(vixl::aarch64::s29.Is(Arm64Assembler::reg_s(S29))); + EXPECT_TRUE(vixl::aarch64::s30.Is(Arm64Assembler::reg_s(S30))); + EXPECT_TRUE(vixl::aarch64::s31.Is(Arm64Assembler::reg_s(S31))); } } // namespace arm64 diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h deleted file mode 100644 index 5c33639a6a..0000000000 --- a/compiler/utils/array_ref.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARRAY_REF_H_ -#define ART_COMPILER_UTILS_ARRAY_REF_H_ - -#include <type_traits> -#include <vector> - -#include "base/logging.h" - -namespace art { - -/** - * @brief A container that references an array. - * - * @details The template class ArrayRef provides a container that references - * an external array. This external array must remain alive while the ArrayRef - * object is in use. The external array may be a std::vector<>-backed storage - * or any other contiguous chunk of memory but that memory must remain valid, - * i.e. the std::vector<> must not be resized for example. - * - * Except for copy/assign and insert/erase/capacity functions, the interface - * is essentially the same as std::vector<>. Since we don't want to throw - * exceptions, at() is also excluded. - */ -template <typename T> -class ArrayRef { - private: - struct tag { }; - - public: - typedef T value_type; - typedef T& reference; - typedef const T& const_reference; - typedef T* pointer; - typedef const T* const_pointer; - typedef T* iterator; - typedef const T* const_iterator; - typedef std::reverse_iterator<iterator> reverse_iterator; - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - - // Constructors. - - constexpr ArrayRef() - : array_(nullptr), size_(0u) { - } - - template <size_t size> - explicit constexpr ArrayRef(T (&array)[size]) - : array_(array), size_(size) { - } - - template <typename U, size_t size> - explicit constexpr ArrayRef(U (&array)[size], - typename std::enable_if<std::is_same<T, const U>::value, tag>::type - t ATTRIBUTE_UNUSED = tag()) - : array_(array), size_(size) { - } - - constexpr ArrayRef(T* array_in, size_t size_in) - : array_(array_in), size_(size_in) { - } - - template <typename Vector, - typename = typename std::enable_if< - std::is_same<typename Vector::value_type, value_type>::value>::type> - explicit ArrayRef(Vector& v) - : array_(v.data()), size_(v.size()) { - } - - template <typename Vector, - typename = typename std::enable_if< - std::is_same< - typename std::add_const<typename Vector::value_type>::type, - value_type>::value>::type> - explicit ArrayRef(const Vector& v) - : array_(v.data()), size_(v.size()) { - } - - ArrayRef(const ArrayRef&) = default; - - // Assignment operators. - - ArrayRef& operator=(const ArrayRef& other) { - array_ = other.array_; - size_ = other.size_; - return *this; - } - - template <typename U> - typename std::enable_if<std::is_same<T, const U>::value, ArrayRef>::type& - operator=(const ArrayRef<U>& other) { - return *this = ArrayRef(other); - } - - // Destructor. - ~ArrayRef() = default; - - // Iterators. - iterator begin() { return array_; } - const_iterator begin() const { return array_; } - const_iterator cbegin() const { return array_; } - iterator end() { return array_ + size_; } - const_iterator end() const { return array_ + size_; } - const_iterator cend() const { return array_ + size_; } - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } - - // Size. - size_type size() const { return size_; } - bool empty() const { return size() == 0u; } - - // Element access. NOTE: Not providing at(). - - reference operator[](size_type n) { - DCHECK_LT(n, size_); - return array_[n]; - } - - const_reference operator[](size_type n) const { - DCHECK_LT(n, size_); - return array_[n]; - } - - reference front() { - DCHECK_NE(size_, 0u); - return array_[0]; - } - - const_reference front() const { - DCHECK_NE(size_, 0u); - return array_[0]; - } - - reference back() { - DCHECK_NE(size_, 0u); - return array_[size_ - 1u]; - } - - const_reference back() const { - DCHECK_NE(size_, 0u); - return array_[size_ - 1u]; - } - - value_type* data() { return array_; } - const value_type* data() const { return array_; } - - ArrayRef SubArray(size_type pos) const { - return SubArray(pos, size_ - pos); - } - ArrayRef SubArray(size_type pos, size_type length) const { - DCHECK_LE(pos, size()); - DCHECK_LE(length, size() - pos); - return ArrayRef(array_ + pos, length); - } - - private: - T* array_; - size_t size_; -}; - -template <typename T> -bool operator==(const ArrayRef<T>& lhs, const ArrayRef<T>& rhs) { - return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); -} - -template <typename T> -bool operator!=(const ArrayRef<T>& lhs, const ArrayRef<T>& rhs) { - return !(lhs == rhs); -} - -} // namespace art - - -#endif // ART_COMPILER_UTILS_ARRAY_REF_H_ diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index e6c3a18d04..57f3b1570a 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -20,7 +20,6 @@ #include <vector> #ifdef ART_ENABLE_CODEGEN_arm -#include "arm/assembler_arm32.h" #include "arm/assembler_thumb2.h" #endif #ifdef ART_ENABLE_CODEGEN_arm64 @@ -121,133 +120,4 @@ void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() { } } -std::unique_ptr<Assembler> Assembler::Create( - ArenaAllocator* arena, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features) { - switch (instruction_set) { -#ifdef ART_ENABLE_CODEGEN_arm - case kArm: - return std::unique_ptr<Assembler>(new (arena) arm::Arm32Assembler(arena)); - case kThumb2: - return std::unique_ptr<Assembler>(new (arena) arm::Thumb2Assembler(arena)); -#endif -#ifdef ART_ENABLE_CODEGEN_arm64 - case kArm64: - return std::unique_ptr<Assembler>(new (arena) arm64::Arm64Assembler(arena)); -#endif -#ifdef ART_ENABLE_CODEGEN_mips - case kMips: - return std::unique_ptr<Assembler>(new (arena) mips::MipsAssembler( - arena, - instruction_set_features != nullptr - ? instruction_set_features->AsMipsInstructionSetFeatures() - : nullptr)); -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 - case kMips64: - return std::unique_ptr<Assembler>(new (arena) mips64::Mips64Assembler(arena)); -#endif -#ifdef ART_ENABLE_CODEGEN_x86 - case kX86: - return std::unique_ptr<Assembler>(new (arena) x86::X86Assembler(arena)); -#endif -#ifdef ART_ENABLE_CODEGEN_x86_64 - case kX86_64: - return std::unique_ptr<Assembler>(new (arena) x86_64::X86_64Assembler(arena)); -#endif - default: - LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; - return nullptr; - } -} - -void Assembler::StoreImmediateToThread32(ThreadOffset<4> dest ATTRIBUTE_UNUSED, - uint32_t imm ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::StoreImmediateToThread64(ThreadOffset<8> dest ATTRIBUTE_UNUSED, - uint32_t imm ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED, - FrameOffset fr_offs ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED, - FrameOffset fr_offs ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::LoadFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED, - ThreadOffset<4> src ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::LoadFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED, - ThreadOffset<8> src ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::LoadRawPtrFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED, - ThreadOffset<4> offs ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::LoadRawPtrFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED, - ThreadOffset<8> offs ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs ATTRIBUTE_UNUSED, - ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs ATTRIBUTE_UNUSED, - ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs ATTRIBUTE_UNUSED, - FrameOffset fr_offs ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs ATTRIBUTE_UNUSED, - FrameOffset fr_offs ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::CallFromThread32(ThreadOffset<4> offset ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - -void Assembler::CallFromThread64(ThreadOffset<8> offset ATTRIBUTE_UNUSED, - ManagedRegister scratch ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL); -} - } // namespace art diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 96da03d082..314ff8cf7a 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -24,6 +24,8 @@ #include "arm/constants_arm.h" #include "base/arena_allocator.h" #include "base/arena_object.h" +#include "base/array_ref.h" +#include "base/enums.h" #include "base/logging.h" #include "base/macros.h" #include "debug/dwarf/debug_frame_opcode_writer.h" @@ -311,8 +313,10 @@ class DebugFrameOpCodeWriterForAssembler FINAL // Override the last delayed PC. The new PC can be out of order. void OverrideDelayedPC(size_t pc) { DCHECK(delay_emitting_advance_pc_); - DCHECK(!delayed_advance_pcs_.empty()); - delayed_advance_pcs_.back().pc = pc; + if (enabled_) { + DCHECK(!delayed_advance_pcs_.empty()); + delayed_advance_pcs_.back().pc = pc; + } } // Return the number of delayed advance PC entries. @@ -352,17 +356,22 @@ class DebugFrameOpCodeWriterForAssembler FINAL class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { public: - static std::unique_ptr<Assembler> Create( - ArenaAllocator* arena, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features = nullptr); - // Finalize the code; emit slow paths, fixup branches, add literal pool, etc. virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); } // Size of generated code virtual size_t CodeSize() const { return buffer_.Size(); } virtual const uint8_t* CodeBufferBaseAddress() const { return buffer_.contents(); } + // CodePosition() is a non-const method similar to CodeSize(), which is used to + // record positions within the code buffer for the purpose of signal handling + // (stack overflow checks and implicit null checks may trigger signals and the + // signal handlers expect them right before the recorded positions). + // On most architectures CodePosition() should be equivalent to CodeSize(), but + // the MIPS assembler needs to be aware of this recording, so it doesn't put + // the instructions that can trigger signals into branch delay slots. Handling + // signals from instructions in delay slots is a bit problematic and should be + // avoided. + virtual size_t CodePosition() { return CodeSize(); } // Copy instructions out of assembly buffer into the given region of memory virtual void FinalizeInstructions(const MemoryRegion& region) { @@ -372,140 +381,6 @@ class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { // TODO: Implement with disassembler. virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {} - // Emit code that will create an activation on the stack - virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) = 0; - - // Emit code that will remove an activation from the stack - virtual void RemoveFrame(size_t frame_size, - const std::vector<ManagedRegister>& callee_save_regs) = 0; - - virtual void IncreaseFrameSize(size_t adjust) = 0; - virtual void DecreaseFrameSize(size_t adjust) = 0; - - // Store routines - virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0; - virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0; - virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0; - - virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, - ManagedRegister scratch) = 0; - - virtual void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, - ManagedRegister scratch); - virtual void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister scratch); - - virtual void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, - FrameOffset fr_offs, - ManagedRegister scratch); - virtual void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, - FrameOffset fr_offs, - ManagedRegister scratch); - - virtual void StoreStackPointerToThread32(ThreadOffset<4> thr_offs); - virtual void StoreStackPointerToThread64(ThreadOffset<8> thr_offs); - - virtual void StoreSpanning(FrameOffset dest, ManagedRegister src, - FrameOffset in_off, ManagedRegister scratch) = 0; - - // Load routines - virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0; - - virtual void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size); - virtual void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size); - - virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0; - // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference. - virtual void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) = 0; - - virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0; - - virtual void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs); - virtual void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs); - - // Copying routines - virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0; - - virtual void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs, - ManagedRegister scratch); - virtual void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, - ManagedRegister scratch); - - virtual void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, - ManagedRegister scratch); - virtual void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, - ManagedRegister scratch); - - virtual void CopyRef(FrameOffset dest, FrameOffset src, - ManagedRegister scratch) = 0; - - virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0; - - virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, - ManagedRegister scratch, size_t size) = 0; - - virtual void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, - ManagedRegister scratch, size_t size) = 0; - - virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, - ManagedRegister scratch, size_t size) = 0; - - virtual void Copy(ManagedRegister dest, Offset dest_offset, - ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) = 0; - - virtual void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) = 0; - - virtual void MemoryBarrier(ManagedRegister scratch) = 0; - - // Sign extension - virtual void SignExtend(ManagedRegister mreg, size_t size) = 0; - - // Zero extension - virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0; - - // Exploit fast access in managed code to Thread::Current() - virtual void GetCurrentThread(ManagedRegister tr) = 0; - virtual void GetCurrentThread(FrameOffset dest_offset, - ManagedRegister scratch) = 0; - - // Set up out_reg to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. in_reg holds a possibly stale reference - // that can be used to avoid loading the handle scope entry to see if the value is - // null. - virtual void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) = 0; - - // Set up out_off to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. - virtual void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister scratch, bool null_allowed) = 0; - - // src holds a handle scope entry (Object**) load this into dst - virtual void LoadReferenceFromHandleScope(ManagedRegister dst, - ManagedRegister src) = 0; - - // Heap::VerifyObject on src. In some cases (such as a reference to this) we - // know that src may not be null. - virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0; - virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0; - - // Call to address held at [base+offset] - virtual void Call(ManagedRegister base, Offset offset, - ManagedRegister scratch) = 0; - virtual void Call(FrameOffset base, Offset offset, - ManagedRegister scratch) = 0; - virtual void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch); - virtual void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch); - - // Generate code to check if Thread::Current()->exception_ is non-null - // and branch to a ExceptionSlowPath if it is. - virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0; - virtual void Bind(Label* label) = 0; virtual void Jump(Label* label) = 0; @@ -517,13 +392,17 @@ class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { */ DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; } - protected: - explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {} - ArenaAllocator* GetArena() { return buffer_.GetArena(); } + AssemblerBuffer* GetBuffer() { + return &buffer_; + } + + protected: + explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {} + AssemblerBuffer buffer_; DebugFrameOpCodeWriterForAssembler cfi_; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 084e9011ba..f655994bd3 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -42,7 +42,10 @@ enum class RegisterView { // private kUseQuaternaryName, }; -template<typename Ass, typename Reg, typename FPReg, typename Imm> +// For use in the template as the default type to get a nonvector registers version. +struct NoVectorRegs {}; + +template<typename Ass, typename Reg, typename FPReg, typename Imm, typename VecReg = NoVectorRegs> class AssemblerTest : public testing::Test { public: Ass* GetAssembler() { @@ -51,30 +54,30 @@ class AssemblerTest : public testing::Test { typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler); - void DriverFn(TestFn f, std::string test_name) { + void DriverFn(TestFn f, const std::string& test_name) { DriverWrapper(f(this, assembler_.get()), test_name); } // This driver assumes the assembler has already been called. - void DriverStr(std::string assembly_string, std::string test_name) { + void DriverStr(const std::string& assembly_string, const std::string& test_name) { DriverWrapper(assembly_string, test_name); } - std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) { + std::string RepeatR(void (Ass::*f)(Reg), const std::string& fmt) { return RepeatTemplatedRegister<Reg>(f, GetRegisters(), &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, fmt); } - std::string Repeatr(void (Ass::*f)(Reg), std::string fmt) { + std::string Repeatr(void (Ass::*f)(Reg), const std::string& fmt) { return RepeatTemplatedRegister<Reg>(f, GetRegisters(), &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>, fmt); } - std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) { + std::string RepeatRR(void (Ass::*f)(Reg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), GetRegisters(), @@ -83,7 +86,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), std::string fmt) { + std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), const std::string& fmt) { return RepeatTemplatedRegistersNoDupes<Reg, Reg>(f, GetRegisters(), GetRegisters(), @@ -92,7 +95,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string Repeatrr(void (Ass::*f)(Reg, Reg), std::string fmt) { + std::string Repeatrr(void (Ass::*f)(Reg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), GetRegisters(), @@ -101,7 +104,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), std::string fmt) { + std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<Reg, Reg, Reg>(f, GetRegisters(), GetRegisters(), @@ -112,7 +115,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) { + std::string Repeatrb(void (Ass::*f)(Reg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), GetRegisters(), @@ -121,7 +124,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) { + std::string RepeatRr(void (Ass::*f)(Reg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), GetRegisters(), @@ -130,11 +133,11 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) { + std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) { return RepeatRegisterImm<RegisterView::kUsePrimaryName>(f, imm_bytes, fmt); } - std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) { + std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) { return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt); } @@ -145,7 +148,9 @@ class AssemblerTest : public testing::Test { const std::vector<Reg2*> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), - std::string fmt) { + const std::string& fmt, + int bias = 0, + int multiplier = 1) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); @@ -153,7 +158,7 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, new_imm); + (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -171,7 +176,7 @@ class AssemblerTest : public testing::Test { size_t imm_index = base.find(IMM_TOKEN); if (imm_index != std::string::npos) { std::ostringstream sreg; - sreg << imm; + sreg << imm * multiplier + bias; std::string imm_string = sreg.str(); base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); } @@ -188,6 +193,67 @@ class AssemblerTest : public testing::Test { return str; } + template <typename Reg1, typename Reg2, typename Reg3, typename ImmType> + std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, Reg3, ImmType), + int imm_bits, + const std::vector<Reg1*> reg1_registers, + const std::vector<Reg2*> reg2_registers, + const std::vector<Reg3*> reg3_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + std::string (AssemblerTest::*GetName3)(const Reg3&), + std::string fmt, + int bias) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); + + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (auto reg3 : reg3_registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + std::string reg3_string = (this->*GetName3)(*reg3); + size_t reg3_index; + while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) { + base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); + } + + size_t imm_index = base.find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm + bias; + std::string imm_string = sreg.str(); + base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + template <typename ImmType, typename Reg1, typename Reg2> std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2), const std::vector<Reg1*> reg1_registers, @@ -195,7 +261,7 @@ class AssemblerTest : public testing::Test { std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), int imm_bits, - std::string fmt) { + const std::string& fmt) { std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); @@ -243,16 +309,17 @@ class AssemblerTest : public testing::Test { template <typename RegType, typename ImmType> std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType), int imm_bits, - const std::vector<Reg*> registers, + const std::vector<RegType*> registers, std::string (AssemblerTest::*GetName)(const RegType&), - std::string fmt) { + const std::string& fmt, + int bias) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); for (auto reg : registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg, new_imm); + (assembler_.get()->*f)(*reg, new_imm + bias); std::string base = fmt; std::string reg_string = (this->*GetName)(*reg); @@ -264,7 +331,7 @@ class AssemblerTest : public testing::Test { size_t imm_index = base.find(IMM_TOKEN); if (imm_index != std::string::npos) { std::ostringstream sreg; - sreg << imm; + sreg << imm + bias; std::string imm_string = sreg.str(); base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); } @@ -281,37 +348,63 @@ class AssemblerTest : public testing::Test { } template <typename ImmType> - std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, std::string fmt) { + std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { return RepeatTemplatedRegistersImmBits<Reg, Reg, ImmType>(f, imm_bits, GetRegisters(), GetRegisters(), &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, - fmt); + fmt, + bias); + } + + template <typename ImmType> + std::string RepeatRRRIb(void (Ass::*f)(Reg, Reg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBits<Reg, Reg, Reg, ImmType>(f, + imm_bits, + GetRegisters(), + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias); } template <typename ImmType> - std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt) { + std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt, int bias = 0) { return RepeatTemplatedRegisterImmBits<Reg, ImmType>(f, imm_bits, GetRegisters(), &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, - fmt); + fmt, + bias); } template <typename ImmType> - std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), int imm_bits, std::string fmt) { + std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { return RepeatTemplatedRegistersImmBits<FPReg, Reg, ImmType>(f, imm_bits, GetFPRegisters(), GetRegisters(), &AssemblerTest::GetFPRegName, &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, - fmt); + fmt, + bias); } - std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) { + std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, FPReg>(f, GetFPRegisters(), GetFPRegisters(), @@ -320,7 +413,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), std::string fmt) { + std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, FPReg, FPReg>(f, GetFPRegisters(), GetFPRegisters(), @@ -331,9 +424,21 @@ class AssemblerTest : public testing::Test { fmt); } + std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) { + return RepeatTemplatedRegisters<FPReg, FPReg, Reg>( + f, + GetFPRegisters(), + GetFPRegisters(), + GetRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt); + } + std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, - std::string fmt) { + const std::string& fmt) { return RepeatTemplatedRegistersImm<FPReg, FPReg>(f, GetFPRegisters(), GetFPRegisters(), @@ -344,7 +449,22 @@ class AssemblerTest : public testing::Test { } template <typename ImmType> - std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) { + std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType), + int imm_bits, + const std::string& fmt) { + return RepeatTemplatedRegistersImmBits<FPReg, FPReg, ImmType>(f, + imm_bits, + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + fmt); + } + + template <typename ImmType> + std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), + int imm_bits, + const std::string& fmt) { return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f, GetFPRegisters(), GetFPRegisters(), @@ -354,7 +474,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) { + std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, Reg>(f, GetFPRegisters(), GetRegisters(), @@ -363,7 +483,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatFr(void (Ass::*f)(FPReg, Reg), std::string fmt) { + std::string RepeatFr(void (Ass::*f)(FPReg, Reg), const std::string& fmt) { return RepeatTemplatedRegisters<FPReg, Reg>(f, GetFPRegisters(), GetRegisters(), @@ -372,7 +492,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatRF(void (Ass::*f)(Reg, FPReg), std::string fmt) { + std::string RepeatRF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) { return RepeatTemplatedRegisters<Reg, FPReg>(f, GetRegisters(), GetFPRegisters(), @@ -381,7 +501,7 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatrF(void (Ass::*f)(Reg, FPReg), std::string fmt) { + std::string RepeatrF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) { return RepeatTemplatedRegisters<Reg, FPReg>(f, GetRegisters(), GetFPRegisters(), @@ -390,7 +510,9 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt, + std::string RepeatI(void (Ass::*f)(const Imm&), + size_t imm_bytes, + const std::string& fmt, bool as_uint = false) { std::string str; std::vector<int64_t> imms = CreateImmediateValues(imm_bytes, as_uint); @@ -420,6 +542,82 @@ class AssemblerTest : public testing::Test { return str; } + std::string RepeatVV(void (Ass::*f)(VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVVV(void (Ass::*f)(VecReg, VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVR(void (Ass::*f)(VecReg, Reg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, Reg>( + f, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt); + } + + template <typename ImmType> + std::string RepeatVIb(void (Ass::*f)(VecReg, ImmType), + int imm_bits, + std::string fmt, + int bias = 0) { + return RepeatTemplatedRegisterImmBits<VecReg, ImmType>(f, + imm_bits, + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + fmt, + bias); + } + + template <typename ImmType> + std::string RepeatVRIb(void (Ass::*f)(VecReg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0, + int multiplier = 1) { + return RepeatTemplatedRegistersImmBits<VecReg, Reg, ImmType>( + f, + imm_bits, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias, + multiplier); + } + + template <typename ImmType> + std::string RepeatVVIb(void (Ass::*f)(VecReg, VecReg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBits<VecReg, VecReg, ImmType>(f, + imm_bits, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt, + bias); + } + // This is intended to be run as a test. bool CheckTools() { return test_helper_->CheckTools(); @@ -434,6 +632,11 @@ class AssemblerTest : public testing::Test { UNREACHABLE(); } + virtual std::vector<VecReg*> GetVectorRegisters() { + UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers"; + UNREACHABLE(); + } + // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems. virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers"; @@ -461,7 +664,7 @@ class AssemblerTest : public testing::Test { void SetUp() OVERRIDE { arena_.reset(new ArenaAllocator(&pool_)); - assembler_.reset(new (arena_.get()) Ass(arena_.get())); + assembler_.reset(CreateAssembler(arena_.get())); test_helper_.reset( new AssemblerTestInfrastructure(GetArchitectureString(), GetAssemblerCmdName(), @@ -481,6 +684,11 @@ class AssemblerTest : public testing::Test { arena_.reset(); } + // Override this to set up any architecture-specific things, e.g., CPU revision. + virtual Ass* CreateAssembler(ArenaAllocator* arena) { + return new (arena) Ass(arena); + } + // Override this to set up any architecture-specific things, e.g., register vectors. virtual void SetUpHelpers() {} @@ -623,7 +831,7 @@ class AssemblerTest : public testing::Test { std::string RepeatTemplatedRegister(void (Ass::*f)(RegType), const std::vector<RegType*> registers, std::string (AssemblerTest::*GetName)(const RegType&), - std::string fmt) { + const std::string& fmt) { std::string str; for (auto reg : registers) { (assembler_.get()->*f)(*reg); @@ -651,7 +859,7 @@ class AssemblerTest : public testing::Test { const std::vector<Reg2*> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), - std::string fmt) { + const std::string& fmt) { WarnOnCombinations(reg1_registers.size() * reg2_registers.size()); std::string str; @@ -689,7 +897,7 @@ class AssemblerTest : public testing::Test { const std::vector<Reg2*> reg2_registers, std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), - std::string fmt) { + const std::string& fmt) { WarnOnCombinations(reg1_registers.size() * reg2_registers.size()); std::string str; @@ -730,7 +938,7 @@ class AssemblerTest : public testing::Test { std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), std::string (AssemblerTest::*GetName3)(const Reg3&), - std::string fmt) { + const std::string& fmt) { std::string str; for (auto reg1 : reg1_registers) { for (auto reg2 : reg2_registers) { @@ -775,7 +983,7 @@ class AssemblerTest : public testing::Test { std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), size_t imm_bytes, - std::string fmt) { + const std::string& fmt) { std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); @@ -848,6 +1056,12 @@ class AssemblerTest : public testing::Test { return sreg.str(); } + std::string GetVecRegName(const VecReg& reg) { + std::ostringstream sreg; + sreg << reg; + return sreg.str(); + } + // If the assembly file needs a header, return it in a sub-class. virtual const char* GetAssemblyHeader() { return nullptr; @@ -867,8 +1081,9 @@ class AssemblerTest : public testing::Test { private: template <RegisterView kRegView> - std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, - std::string fmt) { + std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), + size_t imm_bytes, + const std::string& fmt) { const std::vector<Reg*> registers = GetRegisters(); std::string str; std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); @@ -910,7 +1125,7 @@ class AssemblerTest : public testing::Test { virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { } - void DriverWrapper(std::string assembly_text, std::string test_name) { + void DriverWrapper(const std::string& assembly_text, const std::string& test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index 8c71292465..d76cb1c1df 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -23,7 +23,10 @@ #include <iterator> #include <sys/stat.h> +#include "android-base/strings.h" + #include "common_runtime_test.h" // For ScratchFile +#include "exec_utils.h" #include "utils.h" namespace art { @@ -106,7 +109,9 @@ class AssemblerTestInfrastructure { // Driver() assembles and compares the results. If the results are not equal and we have a // disassembler, disassemble both and check whether they have the same mnemonics (in which case // we just warn). - void Driver(const std::vector<uint8_t>& data, std::string assembly_text, std::string test_name) { + void Driver(const std::vector<uint8_t>& data, + const std::string& assembly_text, + const std::string& test_name) { EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly"; NativeAssemblerResult res; @@ -219,7 +224,7 @@ class AssemblerTestInfrastructure { args.push_back("-o"); args.push_back(to_file); args.push_back(from_file); - std::string cmd = Join(args, ' '); + std::string cmd = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -229,7 +234,7 @@ class AssemblerTestInfrastructure { bool success = Exec(args, error_msg); if (!success) { LOG(ERROR) << "Assembler command line:"; - for (std::string arg : args) { + for (const std::string& arg : args) { LOG(ERROR) << arg; } } @@ -238,7 +243,7 @@ class AssemblerTestInfrastructure { // Runs objdump -h on the binary file and extracts the first line with .text. // Returns "" on failure. - std::string Objdump(std::string file) { + std::string Objdump(const std::string& file) { bool have_objdump = FileExists(FindTool(objdump_cmd_name_)); EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand(); if (!have_objdump) { @@ -255,7 +260,7 @@ class AssemblerTestInfrastructure { args.push_back(file); args.push_back(">"); args.push_back(file+".dump"); - std::string cmd = Join(args, ' '); + std::string cmd = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -287,8 +292,9 @@ class AssemblerTestInfrastructure { } // Disassemble both binaries and compare the text. - bool DisassembleBinaries(const std::vector<uint8_t>& data, const std::vector<uint8_t>& as, - std::string test_name) { + bool DisassembleBinaries(const std::vector<uint8_t>& data, + const std::vector<uint8_t>& as, + const std::string& test_name) { std::string disassembler = GetDisassembleCommand(); if (disassembler.length() == 0) { LOG(WARNING) << "No dissassembler command."; @@ -324,7 +330,7 @@ class AssemblerTestInfrastructure { return result; } - bool DisassembleBinary(std::string file, std::string* error_msg) { + bool DisassembleBinary(const std::string& file, std::string* error_msg) { std::vector<std::string> args; // Encaspulate the whole command line in a single string passed to @@ -335,7 +341,7 @@ class AssemblerTestInfrastructure { args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'"); args.push_back(">"); args.push_back(file+".dis"); - std::string cmd = Join(args, ' '); + std::string cmd = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -345,7 +351,7 @@ class AssemblerTestInfrastructure { return Exec(args, error_msg); } - std::string WriteToFile(const std::vector<uint8_t>& buffer, std::string test_name) { + std::string WriteToFile(const std::vector<uint8_t>& buffer, const std::string& test_name) { std::string file_name = GetTmpnam() + std::string("---") + test_name; const char* data = reinterpret_cast<const char*>(buffer.data()); std::ofstream s_out(file_name + ".o"); @@ -354,7 +360,7 @@ class AssemblerTestInfrastructure { return file_name + ".o"; } - bool CompareFiles(std::string f1, std::string f2) { + bool CompareFiles(const std::string& f1, const std::string& f2) { std::ifstream f1_in(f1); std::ifstream f2_in(f2); @@ -369,7 +375,9 @@ class AssemblerTestInfrastructure { } // Compile the given assembly code and extract the binary, if possible. Put result into res. - bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) { + bool Compile(const std::string& assembly_code, + NativeAssemblerResult* res, + const std::string& test_name) { res->ok = false; res->code.reset(nullptr); @@ -438,7 +446,7 @@ class AssemblerTestInfrastructure { // Check whether file exists. Is used for commands, so strips off any parameters: anything after // the first space. We skip to the last slash for this, so it should work with directories with // spaces. - static bool FileExists(std::string file) { + static bool FileExists(const std::string& file) { if (file.length() == 0) { return false; } @@ -478,7 +486,7 @@ class AssemblerTestInfrastructure { return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string(""); } - std::string FindTool(std::string tool_name) { + std::string FindTool(const std::string& tool_name) { // Find the current tool. Wild-card pattern is "arch-string*tool-name". std::string gcc_path = GetRootPath() + GetGCCRootPath(); std::vector<std::string> args; @@ -495,7 +503,7 @@ class AssemblerTestInfrastructure { std::string tmp_file = GetTmpnam(); args.push_back(">"); args.push_back(tmp_file); - std::string sh_args = Join(args, ' '); + std::string sh_args = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -522,7 +530,8 @@ class AssemblerTestInfrastructure { // Helper for below. If name_predicate is empty, search for all files, otherwise use it for the // "-name" option. - static void FindToolDumpPrintout(std::string name_predicate, std::string tmp_file) { + static void FindToolDumpPrintout(const std::string& name_predicate, + const std::string& tmp_file) { std::string gcc_path = GetRootPath() + GetGCCRootPath(); std::vector<std::string> args; args.push_back("find"); @@ -535,7 +544,7 @@ class AssemblerTestInfrastructure { args.push_back("sort"); args.push_back(">"); args.push_back(tmp_file); - std::string sh_args = Join(args, ' '); + std::string sh_args = android::base::Join(args, ' '); args.clear(); args.push_back("/bin/sh"); @@ -562,7 +571,7 @@ class AssemblerTestInfrastructure { } // For debug purposes. - void FindToolDump(std::string tool_name) { + void FindToolDump(const std::string& tool_name) { // Check with the tool name. FindToolDumpPrintout(architecture_string_ + "*" + tool_name, GetTmpnam()); FindToolDumpPrintout("", GetTmpnam()); diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index c67cb5a563..4e9b619979 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -23,6 +23,10 @@ #include "gtest/gtest.h" #include "utils/arm/assembler_thumb2.h" + +#include "jni/quick/calling_convention.h" +#include "utils/arm/jni_macro_assembler_arm_vixl.h" + #include "base/hex_dump.h" #include "common_runtime_test.h" @@ -32,7 +36,7 @@ namespace arm { // Include results file (generated manually) #include "assembler_thumb_test_expected.cc.inc" -#ifndef __ANDROID__ +#ifndef ART_TARGET_ANDROID // This controls whether the results are printed to the // screen or compared against the expected output. // To generate new expected output, set this to true and @@ -72,7 +76,7 @@ void InitResults() { } std::string GetToolsDir() { -#ifndef __ANDROID__ +#ifndef ART_TARGET_ANDROID // This will only work on the host. There is no as, objcopy or objdump on the device. static std::string toolsdir; @@ -89,7 +93,7 @@ std::string GetToolsDir() { } void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) { -#ifndef __ANDROID__ +#ifndef ART_TARGET_ANDROID static std::string toolsdir = GetToolsDir(); ScratchFile file; @@ -154,7 +158,7 @@ void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* } if (CompareIgnoringSpace(results[lineindex], testline) != 0) { LOG(FATAL) << "Output is not as expected at line: " << lineindex - << results[lineindex] << "/" << testline; + << results[lineindex] << "/" << testline << ", test name: " << testname; } ++lineindex; } @@ -169,7 +173,7 @@ void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* snprintf(buf, sizeof(buf), "%s.oo", filename); unlink(buf); -#endif +#endif // ART_TARGET_ANDROID } #define __ assembler-> @@ -1241,22 +1245,6 @@ TEST_F(Thumb2AssemblerTest, LoadStoreRegOffset) { EmitAndCheck(&assembler, "LoadStoreRegOffset"); } -TEST_F(Thumb2AssemblerTest, LoadStoreLiteral) { - __ ldr(R0, Address(4)); - __ str(R0, Address(4)); - - __ ldr(R0, Address(-8)); - __ str(R0, Address(-8)); - - // Limits. - __ ldr(R0, Address(0x3ff)); // 10 bits (16 bit). - __ ldr(R0, Address(0x7ff)); // 11 bits (32 bit). - __ str(R0, Address(0x3ff)); // 32 bit (no 16 bit str(literal)). - __ str(R0, Address(0x7ff)); // 11 bits (32 bit). - - EmitAndCheck(&assembler, "LoadStoreLiteral"); -} - TEST_F(Thumb2AssemblerTest, LoadStoreLimits) { __ ldr(R0, Address(R4, 124)); // 16 bit. __ ldr(R0, Address(R4, 128)); // 32 bit. @@ -1608,6 +1596,213 @@ TEST_F(Thumb2AssemblerTest, CmpConstant) { EmitAndCheck(&assembler, "CmpConstant"); } +#define ENABLE_VIXL_TEST + +#ifdef ENABLE_VIXL_TEST + +#define ARM_VIXL + +#ifdef ARM_VIXL +typedef arm::ArmVIXLJNIMacroAssembler JniAssemblerType; +#else +typedef arm::Thumb2Assembler AssemblerType; +#endif + +class ArmVIXLAssemblerTest : public ::testing::Test { + public: + ArmVIXLAssemblerTest() : pool(), arena(&pool), assembler(&arena) { } + + ArenaPool pool; + ArenaAllocator arena; + JniAssemblerType assembler; +}; + +#undef __ +#define __ assembler-> + +void EmitAndCheck(JniAssemblerType* assembler, const char* testname, + const char* const* results) { + __ FinalizeCode(); + size_t cs = __ CodeSize(); + std::vector<uint8_t> managed_code(cs); + MemoryRegion code(&managed_code[0], managed_code.size()); + __ FinalizeInstructions(code); + + DumpAndCheck(managed_code, testname, results); +} + +void EmitAndCheck(JniAssemblerType* assembler, const char* testname) { + InitResults(); + std::map<std::string, const char* const*>::iterator results = test_results.find(testname); + ASSERT_NE(results, test_results.end()); + + EmitAndCheck(assembler, testname, results->second); +} + +#undef __ +#define __ assembler. + +TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { + const bool is_static = true; + const bool is_synchronized = false; + const bool is_critical_native = false; + const char* shorty = "IIFII"; + + ArenaPool pool; + ArenaAllocator arena(&pool); + + std::unique_ptr<JniCallingConvention> jni_conv( + JniCallingConvention::Create(&arena, + is_static, + is_synchronized, + is_critical_native, + shorty, + kThumb2)); + std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv( + ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, kThumb2)); + const int frame_size(jni_conv->FrameSize()); + ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters(); + + const ManagedRegister method_register = ArmManagedRegister::FromCoreRegister(R0); + const ManagedRegister scratch_register = ArmManagedRegister::FromCoreRegister(R12); + + __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills()); + __ IncreaseFrameSize(32); + + // Loads + __ IncreaseFrameSize(4096); + __ Load(method_register, FrameOffset(32), 4); + __ Load(method_register, FrameOffset(124), 4); + __ Load(method_register, FrameOffset(132), 4); + __ Load(method_register, FrameOffset(1020), 4); + __ Load(method_register, FrameOffset(1024), 4); + __ Load(scratch_register, FrameOffset(4092), 4); + __ Load(scratch_register, FrameOffset(4096), 4); + __ LoadRawPtrFromThread(scratch_register, ThreadOffset32(512)); + __ LoadRef(method_register, scratch_register, MemberOffset(128), /* unpoison_reference */ false); + + // Stores + __ Store(FrameOffset(32), method_register, 4); + __ Store(FrameOffset(124), method_register, 4); + __ Store(FrameOffset(132), method_register, 4); + __ Store(FrameOffset(1020), method_register, 4); + __ Store(FrameOffset(1024), method_register, 4); + __ Store(FrameOffset(4092), scratch_register, 4); + __ Store(FrameOffset(4096), scratch_register, 4); + __ StoreImmediateToFrame(FrameOffset(48), 0xFF, scratch_register); + __ StoreImmediateToFrame(FrameOffset(48), 0xFFFFFF, scratch_register); + __ StoreRawPtr(FrameOffset(48), scratch_register); + __ StoreRef(FrameOffset(48), scratch_register); + __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48), scratch_register); + __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096), scratch_register); + __ StoreStackPointerToThread(ThreadOffset32(512)); + + // Other + __ Call(method_register, FrameOffset(48), scratch_register); + __ Copy(FrameOffset(48), FrameOffset(44), scratch_register, 4); + __ CopyRawPtrFromThread(FrameOffset(44), ThreadOffset32(512), scratch_register); + __ CopyRef(FrameOffset(48), FrameOffset(44), scratch_register); + __ GetCurrentThread(method_register); + __ GetCurrentThread(FrameOffset(48), scratch_register); + __ Move(scratch_register, method_register, 4); + __ VerifyObject(scratch_register, false); + + __ CreateHandleScopeEntry(scratch_register, FrameOffset(48), scratch_register, true); + __ CreateHandleScopeEntry(scratch_register, FrameOffset(48), scratch_register, false); + __ CreateHandleScopeEntry(method_register, FrameOffset(48), scratch_register, true); + __ CreateHandleScopeEntry(FrameOffset(48), FrameOffset(64), scratch_register, true); + __ CreateHandleScopeEntry(method_register, FrameOffset(0), scratch_register, true); + __ CreateHandleScopeEntry(method_register, FrameOffset(1025), scratch_register, true); + __ CreateHandleScopeEntry(scratch_register, FrameOffset(1025), scratch_register, true); + + __ ExceptionPoll(scratch_register, 0); + + // Push the target out of range of branch emitted by ExceptionPoll. + for (int i = 0; i < 64; i++) { + __ Store(FrameOffset(2047), scratch_register, 4); + } + + __ DecreaseFrameSize(4096); + __ DecreaseFrameSize(32); + __ RemoveFrame(frame_size, callee_save_regs); + + EmitAndCheck(&assembler, "VixlJniHelpers"); +} + +#ifdef ARM_VIXL +#define R0 vixl::aarch32::r0 +#define R2 vixl::aarch32::r2 +#define R4 vixl::aarch32::r4 +#define R12 vixl::aarch32::r12 +#undef __ +#define __ assembler.asm_. +#endif + +TEST_F(ArmVIXLAssemblerTest, VixlLoadFromOffset) { + __ LoadFromOffset(kLoadWord, R2, R4, 12); + __ LoadFromOffset(kLoadWord, R2, R4, 0xfff); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadWord, R2, R4, 0x101000); + __ LoadFromOffset(kLoadWord, R4, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000); + __ LoadFromOffset(kLoadWordPair, R2, R4, 12); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400); + __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400); + + vixl::aarch32::UseScratchRegisterScope temps(assembler.asm_.GetVIXLAssembler()); + temps.Exclude(R12); + __ LoadFromOffset(kLoadWord, R0, R12, 12); // 32-bit because of R12. + temps.Include(R12); + __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000); + + __ LoadFromOffset(kLoadSignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12); + + EmitAndCheck(&assembler, "VixlLoadFromOffset"); +} + +TEST_F(ArmVIXLAssemblerTest, VixlStoreToOffset) { + __ StoreToOffset(kStoreWord, R2, R4, 12); + __ StoreToOffset(kStoreWord, R2, R4, 0xfff); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreWord, R2, R4, 0x101000); + __ StoreToOffset(kStoreWord, R4, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R2, R4, 12); + __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000); + __ StoreToOffset(kStoreWordPair, R2, R4, 12); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400); + __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400); + + vixl::aarch32::UseScratchRegisterScope temps(assembler.asm_.GetVIXLAssembler()); + temps.Exclude(R12); + __ StoreToOffset(kStoreWord, R0, R12, 12); // 32-bit because of R12. + temps.Include(R12); + __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000); + + __ StoreToOffset(kStoreByte, R2, R4, 12); + + EmitAndCheck(&assembler, "VixlStoreToOffset"); +} + #undef __ +#endif // ENABLE_VIXL_TEST } // namespace arm } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 6736015bf1..f8c4008b45 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5012,17 +5012,6 @@ const char* const LoadStoreRegOffsetResults[] = { " 28: f841 0008 str.w r0, [r1, r8]\n", nullptr }; -const char* const LoadStoreLiteralResults[] = { - " 0: 4801 ldr r0, [pc, #4] ; (8 <LoadStoreLiteral+0x8>)\n", - " 2: f8cf 0004 str.w r0, [pc, #4] ; 8 <LoadStoreLiteral+0x8>\n", - " 6: f85f 0008 ldr.w r0, [pc, #-8] ; 0 <LoadStoreLiteral>\n", - " a: f84f 0008 str.w r0, [pc, #-8] ; 4 <LoadStoreLiteral+0x4>\n", - " e: 48ff ldr r0, [pc, #1020] ; (40c <LoadStoreLiteral+0x40c>)\n", - " 10: f8df 07ff ldr.w r0, [pc, #2047] ; 813 <LoadStoreLiteral+0x813>\n", - " 14: f8cf 03ff str.w r0, [pc, #1023] ; 417 <LoadStoreLiteral+0x417>\n", - " 18: f8cf 07ff str.w r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n", - nullptr -}; const char* const LoadStoreLimitsResults[] = { " 0: 6fe0 ldr r0, [r4, #124] ; 0x7c\n", " 2: f8d4 0080 ldr.w r0, [r4, #128] ; 0x80\n", @@ -5468,6 +5457,265 @@ const char* const CmpConstantResults[] = { nullptr }; +const char* const VixlJniHelpersResults[] = { + " 0: e92d 4de0 stmdb sp!, {r5, r6, r7, r8, sl, fp, lr}\n", + " 4: ed2d 8a10 vpush {s16-s31}\n", + " 8: b089 sub sp, #36 ; 0x24\n", + " a: 9000 str r0, [sp, #0]\n", + " c: 9121 str r1, [sp, #132] ; 0x84\n", + " e: ed8d 0a22 vstr s0, [sp, #136] ; 0x88\n", + " 12: 9223 str r2, [sp, #140] ; 0x8c\n", + " 14: 9324 str r3, [sp, #144] ; 0x90\n", + " 16: b088 sub sp, #32\n", + " 18: f5ad 5d80 sub.w sp, sp, #4096 ; 0x1000\n", + " 1c: 9808 ldr r0, [sp, #32]\n", + " 1e: 981f ldr r0, [sp, #124] ; 0x7c\n", + " 20: 9821 ldr r0, [sp, #132] ; 0x84\n", + " 22: 98ff ldr r0, [sp, #1020] ; 0x3fc\n", + " 24: f8dd 0400 ldr.w r0, [sp, #1024] ; 0x400\n", + " 28: f8dd cffc ldr.w ip, [sp, #4092] ; 0xffc\n", + " 2c: f50d 5c80 add.w ip, sp, #4096 ; 0x1000\n", + " 30: f8dc c000 ldr.w ip, [ip]\n", + " 34: f8d9 c200 ldr.w ip, [r9, #512] ; 0x200\n", + " 38: f8dc 0080 ldr.w r0, [ip, #128] ; 0x80\n", + " 3c: 9008 str r0, [sp, #32]\n", + " 3e: 901f str r0, [sp, #124] ; 0x7c\n", + " 40: 9021 str r0, [sp, #132] ; 0x84\n", + " 42: 90ff str r0, [sp, #1020] ; 0x3fc\n", + " 44: f8cd 0400 str.w r0, [sp, #1024] ; 0x400\n", + " 48: f8cd cffc str.w ip, [sp, #4092] ; 0xffc\n", + " 4c: f84d 5d04 str.w r5, [sp, #-4]!\n", + " 50: f50d 5580 add.w r5, sp, #4096 ; 0x1000\n", + " 54: f8c5 c004 str.w ip, [r5, #4]\n", + " 58: f85d 5b04 ldr.w r5, [sp], #4\n", + " 5c: f04f 0cff mov.w ip, #255 ; 0xff\n", + " 60: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 64: f06f 4c7f mvn.w ip, #4278190080 ; 0xff000000\n", + " 68: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 6c: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 70: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 74: 900c str r0, [sp, #48] ; 0x30\n", + " 76: f8dd c030 ldr.w ip, [sp, #48] ; 0x30\n", + " 7a: f8cd c034 str.w ip, [sp, #52] ; 0x34\n", + " 7e: f50d 5c80 add.w ip, sp, #4096 ; 0x1000\n", + " 82: f8c9 c200 str.w ip, [r9, #512] ; 0x200\n", + " 86: f8c9 d200 str.w sp, [r9, #512] ; 0x200\n", + " 8a: f8d0 c030 ldr.w ip, [r0, #48] ; 0x30\n", + " 8e: 47e0 blx ip\n", + " 90: f8dd c02c ldr.w ip, [sp, #44] ; 0x2c\n", + " 94: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " 98: f8d9 c200 ldr.w ip, [r9, #512] ; 0x200\n", + " 9c: f8cd c02c str.w ip, [sp, #44] ; 0x2c\n", + " a0: f8dd c02c ldr.w ip, [sp, #44] ; 0x2c\n", + " a4: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " a8: 4648 mov r0, r9\n", + " aa: f8cd 9030 str.w r9, [sp, #48] ; 0x30\n", + " ae: 4684 mov ip, r0\n", + " b0: f1bc 0f00 cmp.w ip, #0\n", + " b4: bf18 it ne\n", + " b6: f10d 0c30 addne.w ip, sp, #48 ; 0x30\n", + " ba: f10d 0c30 add.w ip, sp, #48 ; 0x30\n", + " be: f1bc 0f00 cmp.w ip, #0\n", + " c2: bf0c ite eq\n", + " c4: 2000 moveq r0, #0\n", + " c6: a80c addne r0, sp, #48 ; 0x30\n", + " c8: f8dd c040 ldr.w ip, [sp, #64] ; 0x40\n", + " cc: f1bc 0f00 cmp.w ip, #0\n", + " d0: bf18 it ne\n", + " d2: f10d 0c40 addne.w ip, sp, #64 ; 0x40\n", + " d6: f8cd c030 str.w ip, [sp, #48] ; 0x30\n", + " da: f1bc 0f00 cmp.w ip, #0\n", + " de: bf0c ite eq\n", + " e0: 2000 moveq r0, #0\n", + " e2: 4668 movne r0, sp\n", + " e4: f1bc 0f00 cmp.w ip, #0\n", + " e8: bf0c ite eq\n", + " ea: 2000 moveq r0, #0\n", + " ec: f20d 4001 addwne r0, sp, #1025 ; 0x401\n", + " f0: f1bc 0f00 cmp.w ip, #0\n", + " f4: bf18 it ne\n", + " f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n", + " fa: f8d9 c084 ldr.w ip, [r9, #132] ; 0x84\n", + " fe: f1bc 0f00 cmp.w ip, #0\n", + " 102: d171 bne.n 1e8 <VixlJniHelpers+0x1e8>\n", + " 104: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 108: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 10c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 110: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 114: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 118: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 11c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 120: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 124: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 128: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 12c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 130: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 134: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 138: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 13c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 140: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 144: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 148: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 14c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 150: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 154: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 158: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 15c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 160: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 164: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 168: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 16c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 170: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 174: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 178: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 17c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 180: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 184: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 188: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 18c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 190: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 194: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 198: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 19c: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1a0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1a4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1a8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1ac: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1b0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1b4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1b8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1bc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1c0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1c4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1c8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1cc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1d0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1d4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1d8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1dc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1e0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1e4: f000 b802 b.w 1ec <VixlJniHelpers+0x1ec>\n", + " 1e8: f000 b818 b.w 21c <VixlJniHelpers+0x21c>\n", + " 1ec: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1f0: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1f4: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1f8: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 1fc: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 200: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 204: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 208: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", + " 20c: f50d 5d80 add.w sp, sp, #4096 ; 0x1000\n", + " 210: b008 add sp, #32\n", + " 212: b009 add sp, #36 ; 0x24\n", + " 214: ecbd 8a10 vpop {s16-s31}\n", + " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", + " 21c: 4660 mov r0, ip\n", + " 21e: f8d9 c2b8 ldr.w ip, [r9, #696] ; 0x2b8\n", + " 222: 47e0 blx ip\n", + nullptr +}; + +const char* const VixlLoadFromOffsetResults[] = { + " 0: 68e2 ldr r2, [r4, #12]\n", + " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " a: 6812 ldr r2, [r2, #0]\n", + " c: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 10: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 14: f44f 5280 mov.w r2, #4096 ; 0x1000\n", + " 18: f2c0 0210 movt r2, #16\n", + " 1c: 4422 add r2, r4\n", + " 1e: 6812 ldr r2, [r2, #0]\n", + " 20: f44f 5c80 mov.w ip, #4096 ; 0x1000\n", + " 24: f2c0 0c10 movt ip, #16\n", + " 28: 4464 add r4, ip\n", + " 2a: 6824 ldr r4, [r4, #0]\n", + " 2c: 89a2 ldrh r2, [r4, #12]\n", + " 2e: f8b4 2fff ldrh.w r2, [r4, #4095] ; 0xfff\n", + " 32: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " 36: 8812 ldrh r2, [r2, #0]\n", + " 38: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 3c: f8b2 20a4 ldrh.w r2, [r2, #164] ; 0xa4\n", + " 40: f44f 5280 mov.w r2, #4096 ; 0x1000\n", + " 44: f2c0 0210 movt r2, #16\n", + " 48: 4422 add r2, r4\n", + " 4a: 8812 ldrh r2, [r2, #0]\n", + " 4c: f44f 5c80 mov.w ip, #4096 ; 0x1000\n", + " 50: f2c0 0c10 movt ip, #16\n", + " 54: 4464 add r4, ip\n", + " 56: 8824 ldrh r4, [r4, #0]\n", + " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n", + " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020] ; 0x3fc\n", + " 60: f504 6280 add.w r2, r4, #1024 ; 0x400\n", + " 64: e9d2 2300 ldrd r2, r3, [r2]\n", + " 68: f504 2280 add.w r2, r4, #262144 ; 0x40000\n", + " 6c: e9d2 2329 ldrd r2, r3, [r2, #164] ; 0xa4\n", + " 70: f44f 6280 mov.w r2, #1024 ; 0x400\n", + " 74: f2c0 0204 movt r2, #4\n", + " 78: 4422 add r2, r4\n", + " 7a: e9d2 2300 ldrd r2, r3, [r2]\n", + " 7e: f44f 6c80 mov.w ip, #1024 ; 0x400\n", + " 82: f2c0 0c04 movt ip, #4\n", + " 86: 4464 add r4, ip\n", + " 88: e9d4 4500 ldrd r4, r5, [r4]\n", + " 8c: f8dc 000c ldr.w r0, [ip, #12]\n", + " 90: f5a4 1280 sub.w r2, r4, #1048576 ; 0x100000\n", + " 94: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 98: f994 200c ldrsb.w r2, [r4, #12]\n", + " 9c: 7b22 ldrb r2, [r4, #12]\n", + " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n", + nullptr +}; +const char* const VixlStoreToOffsetResults[] = { + " 0: 60e2 str r2, [r4, #12]\n", + " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " a: f8cc 2000 str.w r2, [ip]\n", + " e: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 12: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " 16: f44f 5c80 mov.w ip, #4096 ; 0x1000\n", + " 1a: f2c0 0c10 movt ip, #16\n", + " 1e: 44a4 add ip, r4\n", + " 20: f8cc 2000 str.w r2, [ip]\n", + " 24: f44f 5c80 mov.w ip, #4096 ; 0x1000\n", + " 28: f2c0 0c10 movt ip, #16\n", + " 2c: 44a4 add ip, r4\n", + " 2e: f8cc 4000 str.w r4, [ip]\n", + " 32: 81a2 strh r2, [r4, #12]\n", + " 34: f8a4 2fff strh.w r2, [r4, #4095] ; 0xfff\n", + " 38: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " 3c: f8ac 2000 strh.w r2, [ip]\n", + " 40: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 44: f8ac 20a4 strh.w r2, [ip, #164] ; 0xa4\n", + " 48: f44f 5c80 mov.w ip, #4096 ; 0x1000\n", + " 4c: f2c0 0c10 movt ip, #16\n", + " 50: 44a4 add ip, r4\n", + " 52: f8ac 2000 strh.w r2, [ip]\n", + " 56: f44f 5c80 mov.w ip, #4096 ; 0x1000\n", + " 5a: f2c0 0c10 movt ip, #16\n", + " 5e: 44a4 add ip, r4\n", + " 60: f8ac 4000 strh.w r4, [ip]\n", + " 64: e9c4 2303 strd r2, r3, [r4, #12]\n", + " 68: e9c4 23ff strd r2, r3, [r4, #1020] ; 0x3fc\n", + " 6c: f504 6c80 add.w ip, r4, #1024 ; 0x400\n", + " 70: e9cc 2300 strd r2, r3, [ip]\n", + " 74: f504 2c80 add.w ip, r4, #262144 ; 0x40000\n", + " 78: e9cc 2329 strd r2, r3, [ip, #164] ; 0xa4\n", + " 7c: f44f 6c80 mov.w ip, #1024 ; 0x400\n", + " 80: f2c0 0c04 movt ip, #4\n", + " 84: 44a4 add ip, r4\n", + " 86: e9cc 2300 strd r2, r3, [ip]\n", + " 8a: f44f 6c80 mov.w ip, #1024 ; 0x400\n", + " 8e: f2c0 0c04 movt ip, #4\n", + " 92: 44a4 add ip, r4\n", + " 94: e9cc 4500 strd r4, r5, [ip]\n", + " 98: f8cc 000c str.w r0, [ip, #12]\n", + " 9c: f5a4 1c80 sub.w ip, r4, #1048576 ; 0x100000\n", + " a0: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " a4: 7322 strb r2, [r4, #12]\n", + nullptr +}; + std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; @@ -5515,9 +5763,11 @@ void setup_results() { test_results["MixedBranch32"] = MixedBranch32Results; test_results["Shifts"] = ShiftsResults; test_results["LoadStoreRegOffset"] = LoadStoreRegOffsetResults; - test_results["LoadStoreLiteral"] = LoadStoreLiteralResults; test_results["LoadStoreLimits"] = LoadStoreLimitsResults; test_results["CompareAndBranch"] = CompareAndBranchResults; test_results["AddConstant"] = AddConstantResults; test_results["CmpConstant"] = CmpConstantResults; + test_results["VixlJniHelpers"] = VixlJniHelpersResults; + test_results["VixlStoreToOffset"] = VixlStoreToOffsetResults; + test_results["VixlLoadFromOffset"] = VixlLoadFromOffsetResults; } diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h new file mode 100644 index 0000000000..ad3a099eb6 --- /dev/null +++ b/compiler/utils/atomic_method_ref_map-inl.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_ +#define ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_ + +#include "atomic_method_ref_map.h" + +#include "dex_file-inl.h" + +namespace art { + +template <typename T> +inline typename AtomicMethodRefMap<T>::InsertResult AtomicMethodRefMap<T>::Insert( + MethodReference ref, + const T& expected, + const T& desired) { + ElementArray* const array = GetArray(ref.dex_file); + if (array == nullptr) { + return kInsertResultInvalidDexFile; + } + return (*array)[ref.dex_method_index].CompareExchangeStrongSequentiallyConsistent( + expected, desired) + ? kInsertResultSuccess + : kInsertResultCASFailure; +} + +template <typename T> +inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const { + const ElementArray* const array = GetArray(ref.dex_file); + if (array == nullptr) { + return false; + } + *out = (*array)[ref.dex_method_index].LoadRelaxed(); + return true; +} + +template <typename T> +inline void AtomicMethodRefMap<T>::AddDexFile(const DexFile* dex_file) { + arrays_.Put(dex_file, std::move(ElementArray(dex_file->NumMethodIds()))); +} + +template <typename T> +inline typename AtomicMethodRefMap<T>::ElementArray* AtomicMethodRefMap<T>::GetArray( + const DexFile* dex_file) { + auto it = arrays_.find(dex_file); + return (it != arrays_.end()) ? &it->second : nullptr; +} + +template <typename T> +inline const typename AtomicMethodRefMap<T>::ElementArray* AtomicMethodRefMap<T>::GetArray( + const DexFile* dex_file) const { + auto it = arrays_.find(dex_file); + return (it != arrays_.end()) ? &it->second : nullptr; +} + +template <typename T> template <typename Visitor> +inline void AtomicMethodRefMap<T>::Visit(const Visitor& visitor) { + for (auto& pair : arrays_) { + const DexFile* dex_file = pair.first; + const ElementArray& elements = pair.second; + for (size_t i = 0; i < elements.size(); ++i) { + visitor(MethodReference(dex_file, i), elements[i].LoadRelaxed()); + } + } +} + +template <typename T> +inline void AtomicMethodRefMap<T>::ClearEntries() { + for (auto& it : arrays_) { + for (auto& element : it.second) { + element.StoreRelaxed(nullptr); + } + } +} + +} // namespace art + +#endif // ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_ diff --git a/compiler/utils/atomic_method_ref_map.h b/compiler/utils/atomic_method_ref_map.h new file mode 100644 index 0000000000..fed848f563 --- /dev/null +++ b/compiler/utils/atomic_method_ref_map.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_ +#define ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_ + +#include "base/dchecked_vector.h" +#include "method_reference.h" +#include "safe_map.h" + +namespace art { + +class DexFile; + +// Used by CompilerCallbacks to track verification information from the Runtime. +template <typename T> +class AtomicMethodRefMap { + public: + explicit AtomicMethodRefMap() {} + ~AtomicMethodRefMap() {} + + // Atomically swap the element in if the existing value matches expected. + enum InsertResult { + kInsertResultInvalidDexFile, + kInsertResultCASFailure, + kInsertResultSuccess, + }; + InsertResult Insert(MethodReference ref, const T& expected, const T& desired); + + // Retreive an item, returns false if the dex file is not added. + bool Get(MethodReference ref, T* out) const; + + // Dex files must be added before method references belonging to them can be used as keys. Not + // thread safe. + void AddDexFile(const DexFile* dex_file); + + bool HaveDexFile(const DexFile* dex_file) const { + return arrays_.find(dex_file) != arrays_.end(); + } + + // Visit all of the dex files and elements. + template <typename Visitor> + void Visit(const Visitor& visitor); + + void ClearEntries(); + + private: + // Verified methods. The method array is fixed to avoid needing a lock to extend it. + using ElementArray = dchecked_vector<Atomic<T>>; + using DexFileArrays = SafeMap<const DexFile*, ElementArray>; + + const ElementArray* GetArray(const DexFile* dex_file) const; + ElementArray* GetArray(const DexFile* dex_file); + + DexFileArrays arrays_; +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_ diff --git a/compiler/utils/atomic_method_ref_map_test.cc b/compiler/utils/atomic_method_ref_map_test.cc new file mode 100644 index 0000000000..9e5bf4bbe1 --- /dev/null +++ b/compiler/utils/atomic_method_ref_map_test.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "atomic_method_ref_map-inl.h" + +#include <memory> + +#include "common_runtime_test.h" +#include "dex_file-inl.h" +#include "method_reference.h" +#include "scoped_thread_state_change-inl.h" + +namespace art { + +class AtomicMethodRefMapTest : public CommonRuntimeTest {}; + +TEST_F(AtomicMethodRefMapTest, RunTests) { + ScopedObjectAccess soa(Thread::Current()); + std::unique_ptr<const DexFile> dex(OpenTestDexFile("Interfaces")); + ASSERT_TRUE(dex != nullptr); + using Map = AtomicMethodRefMap<int>; + Map map; + int value = 123; + // Error case: Not already inserted. + EXPECT_FALSE(map.Get(MethodReference(dex.get(), 1), &value)); + EXPECT_FALSE(map.HaveDexFile(dex.get())); + // Error case: Dex file not registered. + EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, 1) == Map::kInsertResultInvalidDexFile); + map.AddDexFile(dex.get()); + EXPECT_TRUE(map.HaveDexFile(dex.get())); + EXPECT_GT(dex->NumMethodIds(), 10u); + // After we have added the get should succeed but return the default value. + EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value)); + EXPECT_EQ(value, 0); + // Actually insert an item and make sure we can retreive it. + static const int kInsertValue = 44; + EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, kInsertValue) == + Map::kInsertResultSuccess); + EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value)); + EXPECT_EQ(value, kInsertValue); + static const int kInsertValue2 = 123; + EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 2), 0, kInsertValue2) == + Map::kInsertResultSuccess); + EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value)); + EXPECT_EQ(value, kInsertValue); + EXPECT_TRUE(map.Get(MethodReference(dex.get(), 2), &value)); + EXPECT_EQ(value, kInsertValue2); + // Error case: Incorrect expected value for CAS. + EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, kInsertValue + 1) == + Map::kInsertResultCASFailure); + // Correctly overwrite the value and verify. + EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), kInsertValue, kInsertValue + 1) == + Map::kInsertResultSuccess); + EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value)); + EXPECT_EQ(value, kInsertValue + 1); +} + +} // namespace art diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h index ac5481336b..c06e9cadcc 100644 --- a/compiler/utils/dedupe_set-inl.h +++ b/compiler/utils/dedupe_set-inl.h @@ -23,10 +23,11 @@ #include <inttypes.h> #include <unordered_map> +#include "android-base/stringprintf.h" + #include "base/mutex.h" #include "base/hash_set.h" #include "base/stl_util.h" -#include "base/stringprintf.h" #include "base/time_utils.h" namespace art { @@ -238,13 +239,13 @@ std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpS for (HashType shard = 0; shard < kShard; ++shard) { shards_[shard]->UpdateStats(self, &stats); } - return StringPrintf("%zu collisions, %zu max hash collisions, " - "%zu/%zu probe distance, %" PRIu64 " ns hash time", - stats.collision_sum, - stats.collision_max, - stats.total_probe_distance, - stats.total_size, - hash_time_); + return android::base::StringPrintf("%zu collisions, %zu max hash collisions, " + "%zu/%zu probe distance, %" PRIu64 " ns hash time", + stats.collision_sum, + stats.collision_max, + stats.total_probe_distance, + stats.total_size, + hash_time_); } diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc index 60a891d6a2..4c0979e0b7 100644 --- a/compiler/utils/dedupe_set_test.cc +++ b/compiler/utils/dedupe_set_test.cc @@ -20,10 +20,10 @@ #include <cstdio> #include <vector> +#include "base/array_ref.h" #include "dedupe_set-inl.h" #include "gtest/gtest.h" #include "thread-inl.h" -#include "utils/array_ref.h" namespace art { diff --git a/compiler/utils/intrusive_forward_list.h b/compiler/utils/intrusive_forward_list.h index ec2c08722c..b5fc2f2456 100644 --- a/compiler/utils/intrusive_forward_list.h +++ b/compiler/utils/intrusive_forward_list.h @@ -59,7 +59,7 @@ class IntrusiveForwardListIterator : public std::iterator<std::forward_iterator_ // Conversion from iterator to const_iterator. template <typename OtherT, typename = typename std::enable_if<std::is_same<T, const OtherT>::value>::type> - IntrusiveForwardListIterator(const IntrusiveForwardListIterator<OtherT, HookTraits>& src) + IntrusiveForwardListIterator(const IntrusiveForwardListIterator<OtherT, HookTraits>& src) // NOLINT, implicit : hook_(src.hook_) { } // Iteration. diff --git a/compiler/utils/intrusive_forward_list_test.cc b/compiler/utils/intrusive_forward_list_test.cc index 517142e1b5..f2efa4dd15 100644 --- a/compiler/utils/intrusive_forward_list_test.cc +++ b/compiler/utils/intrusive_forward_list_test.cc @@ -39,12 +39,12 @@ bool operator<(const IFLTestValue& lhs, const IFLTestValue& rhs) { return lhs.value < rhs.value; } -#define ASSERT_LISTS_EQUAL(expected, value) \ - do { \ - ASSERT_EQ(expected.empty(), value.empty()); \ - ASSERT_EQ(std::distance(expected.begin(), expected.end()), \ - std::distance(value.begin(), value.end())); \ - ASSERT_TRUE(std::equal(expected.begin(), expected.end(), value.begin())); \ +#define ASSERT_LISTS_EQUAL(expected, value) \ + do { \ + ASSERT_EQ((expected).empty(), (value).empty()); \ + ASSERT_EQ(std::distance((expected).begin(), (expected).end()), \ + std::distance((value).begin(), (value).end())); \ + ASSERT_TRUE(std::equal((expected).begin(), (expected).end(), (value).begin())); \ } while (false) TEST(IntrusiveForwardList, IteratorToConstIterator) { diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc new file mode 100644 index 0000000000..3ac6c3ca7a --- /dev/null +++ b/compiler/utils/jni_macro_assembler.cc @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler.h" + +#include <algorithm> +#include <vector> + +#ifdef ART_ENABLE_CODEGEN_arm +#include "arm/jni_macro_assembler_arm_vixl.h" +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 +#include "arm64/jni_macro_assembler_arm64.h" +#endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "mips/assembler_mips.h" +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 +#include "mips64/assembler_mips64.h" +#endif +#ifdef ART_ENABLE_CODEGEN_x86 +#include "x86/jni_macro_assembler_x86.h" +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 +#include "x86_64/jni_macro_assembler_x86_64.h" +#endif +#include "base/casts.h" +#include "globals.h" +#include "memory_region.h" + +namespace art { + +using MacroAsm32UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k32>>; + +template <> +MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create( + ArenaAllocator* arena, + InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features) { +#ifndef ART_ENABLE_CODEGEN_mips + UNUSED(instruction_set_features); +#endif + + switch (instruction_set) { +#ifdef ART_ENABLE_CODEGEN_arm + case kArm: + case kThumb2: + return MacroAsm32UniquePtr(new (arena) arm::ArmVIXLJNIMacroAssembler(arena)); +#endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: + return MacroAsm32UniquePtr(new (arena) mips::MipsAssembler( + arena, + instruction_set_features != nullptr + ? instruction_set_features->AsMipsInstructionSetFeatures() + : nullptr)); +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + case kX86: + return MacroAsm32UniquePtr(new (arena) x86::X86JNIMacroAssembler(arena)); +#endif + default: + LOG(FATAL) << "Unknown/unsupported 4B InstructionSet: " << instruction_set; + UNREACHABLE(); + } +} + +using MacroAsm64UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k64>>; + +template <> +MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( + ArenaAllocator* arena, + InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features) { +#ifndef ART_ENABLE_CODEGEN_mips64 + UNUSED(instruction_set_features); +#endif + + switch (instruction_set) { +#ifdef ART_ENABLE_CODEGEN_arm64 + case kArm64: + return MacroAsm64UniquePtr(new (arena) arm64::Arm64JNIMacroAssembler(arena)); +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 + case kMips64: + return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler( + arena, + instruction_set_features != nullptr + ? instruction_set_features->AsMips64InstructionSetFeatures() + : nullptr)); +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + case kX86_64: + return MacroAsm64UniquePtr(new (arena) x86_64::X86_64JNIMacroAssembler(arena)); +#endif + default: + UNUSED(arena); + LOG(FATAL) << "Unknown/unsupported 8B InstructionSet: " << instruction_set; + UNREACHABLE(); + } +} + +} // namespace art diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h new file mode 100644 index 0000000000..59a1a48e20 --- /dev/null +++ b/compiler/utils/jni_macro_assembler.h @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_ +#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_ + +#include <vector> + +#include "arch/instruction_set.h" +#include "base/arena_allocator.h" +#include "base/arena_object.h" +#include "base/array_ref.h" +#include "base/enums.h" +#include "base/logging.h" +#include "base/macros.h" +#include "managed_register.h" +#include "offsets.h" + +namespace art { + +class ArenaAllocator; +class DebugFrameOpCodeWriterForAssembler; +class InstructionSetFeatures; +class MemoryRegion; +class JNIMacroLabel; + +enum class JNIMacroUnaryCondition { + kZero, + kNotZero +}; + +template <PointerSize kPointerSize> +class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> { + public: + static std::unique_ptr<JNIMacroAssembler<kPointerSize>> Create( + ArenaAllocator* arena, + InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features = nullptr); + + // Finalize the code; emit slow paths, fixup branches, add literal pool, etc. + virtual void FinalizeCode() = 0; + + // Size of generated code + virtual size_t CodeSize() const = 0; + + // Copy instructions out of assembly buffer into the given region of memory + virtual void FinalizeInstructions(const MemoryRegion& region) = 0; + + // Emit code that will create an activation on the stack + virtual void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) = 0; + + // Emit code that will remove an activation from the stack + virtual void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) = 0; + + virtual void IncreaseFrameSize(size_t adjust) = 0; + virtual void DecreaseFrameSize(size_t adjust) = 0; + + // Store routines + virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0; + virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0; + virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0; + + virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0; + + virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) = 0; + + virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0; + + virtual void StoreSpanning(FrameOffset dest, + ManagedRegister src, + FrameOffset in_off, + ManagedRegister scratch) = 0; + + // Load routines + virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0; + + virtual void LoadFromThread(ManagedRegister dest, + ThreadOffset<kPointerSize> src, + size_t size) = 0; + + virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0; + // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference. + virtual void LoadRef(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs, + bool unpoison_reference) = 0; + + virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0; + + virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0; + + // Copying routines + virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0; + + virtual void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset<kPointerSize> thr_offs, + ManagedRegister scratch) = 0; + + virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) = 0; + + virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0; + + virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0; + + virtual void Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) = 0; + + virtual void Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister scratch, + size_t size) = 0; + + virtual void Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) = 0; + + virtual void Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister scratch, + size_t size) = 0; + + virtual void Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister scratch, + size_t size) = 0; + + virtual void MemoryBarrier(ManagedRegister scratch) = 0; + + // Sign extension + virtual void SignExtend(ManagedRegister mreg, size_t size) = 0; + + // Zero extension + virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0; + + // Exploit fast access in managed code to Thread::Current() + virtual void GetCurrentThread(ManagedRegister tr) = 0; + virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + virtual void CreateHandleScopeEntry(ManagedRegister out_reg, + FrameOffset handlescope_offset, + ManagedRegister in_reg, + bool null_allowed) = 0; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + virtual void CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handlescope_offset, + ManagedRegister scratch, + bool null_allowed) = 0; + + // src holds a handle scope entry (Object**) load this into dst + virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) = 0; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0; + virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0; + + // Call to address held at [base+offset] + virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0; + virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0; + virtual void CallFromThread(ThreadOffset<kPointerSize> offset, ManagedRegister scratch) = 0; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0; + + // Create a new label that can be used with Jump/Bind calls. + virtual std::unique_ptr<JNIMacroLabel> CreateLabel() = 0; + // Emit an unconditional jump to the label. + virtual void Jump(JNIMacroLabel* label) = 0; + // Emit a conditional jump to the label by applying a unary condition test to the register. + virtual void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) = 0; + // Code at this offset will serve as the target for the Jump call. + virtual void Bind(JNIMacroLabel* label) = 0; + + virtual ~JNIMacroAssembler() {} + + /** + * @brief Buffer of DWARF's Call Frame Information opcodes. + * @details It is used by debuggers and other tools to unwind the call stack. + */ + virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0; + + protected: + explicit JNIMacroAssembler() {} +}; + +// A "Label" class used with the JNIMacroAssembler +// allowing one to use branches (jumping from one place to another). +// +// This is just an interface, so every platform must provide +// its own implementation of it. +// +// It is only safe to use a label created +// via JNIMacroAssembler::CreateLabel with that same macro assembler. +class JNIMacroLabel { + public: + virtual ~JNIMacroLabel() = 0; + + const InstructionSet isa_; + protected: + explicit JNIMacroLabel(InstructionSet isa) : isa_(isa) {} +}; + +inline JNIMacroLabel::~JNIMacroLabel() { + // Compulsory definition for a pure virtual destructor + // to avoid linking errors. +} + +template <typename T, PointerSize kPointerSize> +class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> { + public: + void FinalizeCode() OVERRIDE { + asm_.FinalizeCode(); + } + + size_t CodeSize() const OVERRIDE { + return asm_.CodeSize(); + } + + void FinalizeInstructions(const MemoryRegion& region) OVERRIDE { + asm_.FinalizeInstructions(region); + } + + DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE { + return asm_.cfi(); + } + + protected: + explicit JNIMacroAssemblerFwd(ArenaAllocator* arena) : asm_(arena) {} + + T asm_; +}; + +template <typename Self, typename PlatformLabel, InstructionSet kIsa> +class JNIMacroLabelCommon : public JNIMacroLabel { + public: + static Self* Cast(JNIMacroLabel* label) { + CHECK(label != nullptr); + CHECK_EQ(kIsa, label->isa_); + + return reinterpret_cast<Self*>(label); + } + + protected: + PlatformLabel* AsPlatformLabel() { + return &label_; + } + + JNIMacroLabelCommon() : JNIMacroLabel(kIsa) { + } + + virtual ~JNIMacroLabelCommon() OVERRIDE {} + + private: + PlatformLabel label_; +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_ diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h new file mode 100644 index 0000000000..293f4cde9c --- /dev/null +++ b/compiler/utils/jni_macro_assembler_test.h @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_ +#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_ + +#include "jni_macro_assembler.h" + +#include "assembler_test_base.h" +#include "common_runtime_test.h" // For ScratchFile + +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <iterator> +#include <sys/stat.h> + +namespace art { + +template<typename Ass> +class JNIMacroAssemblerTest : public testing::Test { + public: + Ass* GetAssembler() { + return assembler_.get(); + } + + typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler); + + void DriverFn(TestFn f, const std::string& test_name) { + DriverWrapper(f(this, assembler_.get()), test_name); + } + + // This driver assumes the assembler has already been called. + void DriverStr(const std::string& assembly_string, const std::string& test_name) { + DriverWrapper(assembly_string, test_name); + } + + // This is intended to be run as a test. + bool CheckTools() { + return test_helper_->CheckTools(); + } + + protected: + explicit JNIMacroAssemblerTest() {} + + void SetUp() OVERRIDE { + arena_.reset(new ArenaAllocator(&pool_)); + assembler_.reset(CreateAssembler(arena_.get())); + test_helper_.reset( + new AssemblerTestInfrastructure(GetArchitectureString(), + GetAssemblerCmdName(), + GetAssemblerParameters(), + GetObjdumpCmdName(), + GetObjdumpParameters(), + GetDisassembleCmdName(), + GetDisassembleParameters(), + GetAssemblyHeader())); + + SetUpHelpers(); + } + + void TearDown() OVERRIDE { + test_helper_.reset(); // Clean up the helper. + assembler_.reset(); + arena_.reset(); + } + + // Override this to set up any architecture-specific things, e.g., CPU revision. + virtual Ass* CreateAssembler(ArenaAllocator* arena) { + return new (arena) Ass(arena); + } + + // Override this to set up any architecture-specific things, e.g., register vectors. + virtual void SetUpHelpers() {} + + // Get the typically used name for this architecture, e.g., aarch64, x86_64, ... + virtual std::string GetArchitectureString() = 0; + + // Get the name of the assembler, e.g., "as" by default. + virtual std::string GetAssemblerCmdName() { + return "as"; + } + + // Switches to the assembler command. Default none. + virtual std::string GetAssemblerParameters() { + return ""; + } + + // Get the name of the objdump, e.g., "objdump" by default. + virtual std::string GetObjdumpCmdName() { + return "objdump"; + } + + // Switches to the objdump command. Default is " -h". + virtual std::string GetObjdumpParameters() { + return " -h"; + } + + // Get the name of the objdump, e.g., "objdump" by default. + virtual std::string GetDisassembleCmdName() { + return "objdump"; + } + + // Switches to the objdump command. As it's a binary, one needs to push the architecture and + // such to objdump, so it's architecture-specific and there is no default. + virtual std::string GetDisassembleParameters() = 0; + + // If the assembly file needs a header, return it in a sub-class. + virtual const char* GetAssemblyHeader() { + return nullptr; + } + + private: + // Override this to pad the code with NOPs to a certain size if needed. + virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { + } + + void DriverWrapper(const std::string& assembly_text, const std::string& test_name) { + assembler_->FinalizeCode(); + size_t cs = assembler_->CodeSize(); + std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); + MemoryRegion code(&(*data)[0], data->size()); + assembler_->FinalizeInstructions(code); + Pad(*data); + test_helper_->Driver(*data, assembly_text, test_name); + } + + ArenaPool pool_; + std::unique_ptr<ArenaAllocator> arena_; + std::unique_ptr<Ass> assembler_; + std::unique_ptr<AssemblerTestInfrastructure> test_helper_; + + DISALLOW_COPY_AND_ASSIGN(JNIMacroAssemblerTest); +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_ diff --git a/compiler/utils/label.h b/compiler/utils/label.h index 1038f44ffe..0f82ad5ff1 100644 --- a/compiler/utils/label.h +++ b/compiler/utils/label.h @@ -28,7 +28,6 @@ class AssemblerFixup; namespace arm { class ArmAssembler; - class Arm32Assembler; class Thumb2Assembler; } namespace arm64 { @@ -118,7 +117,6 @@ class Label { } friend class arm::ArmAssembler; - friend class arm::Arm32Assembler; friend class arm::Thumb2Assembler; friend class arm64::Arm64Assembler; friend class mips::MipsAssembler; diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h index 893daff719..184cdf5050 100644 --- a/compiler/utils/managed_register.h +++ b/compiler/utils/managed_register.h @@ -17,8 +17,11 @@ #ifndef ART_COMPILER_UTILS_MANAGED_REGISTER_H_ #define ART_COMPILER_UTILS_MANAGED_REGISTER_H_ +#include <type_traits> #include <vector> +#include "base/value_object.h" + namespace art { namespace arm { @@ -42,49 +45,49 @@ namespace x86_64 { class X86_64ManagedRegister; } -class ManagedRegister { +class ManagedRegister : public ValueObject { public: // ManagedRegister is a value class. There exists no method to change the // internal state. We therefore allow a copy constructor and an // assignment-operator. - ManagedRegister(const ManagedRegister& other) : id_(other.id_) { } + constexpr ManagedRegister(const ManagedRegister& other) = default; - ManagedRegister& operator=(const ManagedRegister& other) { - id_ = other.id_; - return *this; - } + ManagedRegister& operator=(const ManagedRegister& other) = default; - arm::ArmManagedRegister AsArm() const; - arm64::Arm64ManagedRegister AsArm64() const; - mips::MipsManagedRegister AsMips() const; - mips64::Mips64ManagedRegister AsMips64() const; - x86::X86ManagedRegister AsX86() const; - x86_64::X86_64ManagedRegister AsX86_64() const; + constexpr arm::ArmManagedRegister AsArm() const; + constexpr arm64::Arm64ManagedRegister AsArm64() const; + constexpr mips::MipsManagedRegister AsMips() const; + constexpr mips64::Mips64ManagedRegister AsMips64() const; + constexpr x86::X86ManagedRegister AsX86() const; + constexpr x86_64::X86_64ManagedRegister AsX86_64() const; // It is valid to invoke Equals on and with a NoRegister. - bool Equals(const ManagedRegister& other) const { + constexpr bool Equals(const ManagedRegister& other) const { return id_ == other.id_; } - bool IsNoRegister() const { + constexpr bool IsNoRegister() const { return id_ == kNoRegister; } - static ManagedRegister NoRegister() { + static constexpr ManagedRegister NoRegister() { return ManagedRegister(); } - int RegId() const { return id_; } - explicit ManagedRegister(int reg_id) : id_(reg_id) { } + constexpr int RegId() const { return id_; } + explicit constexpr ManagedRegister(int reg_id) : id_(reg_id) { } protected: static const int kNoRegister = -1; - ManagedRegister() : id_(kNoRegister) { } + constexpr ManagedRegister() : id_(kNoRegister) { } int id_; }; +static_assert(std::is_trivially_copyable<ManagedRegister>::value, + "ManagedRegister should be trivially copyable"); + class ManagedRegisterSpill : public ManagedRegister { public: // ManagedRegisterSpill contains information about data type size and location in caller frame @@ -115,18 +118,18 @@ class ManagedRegisterEntrySpills : public std::vector<ManagedRegisterSpill> { public: // The ManagedRegister does not have information about size and offset. // In this case it's size and offset determined by BuildFrame (assembler) - void push_back(ManagedRegister __x) { - ManagedRegisterSpill spill(__x); + void push_back(ManagedRegister x) { + ManagedRegisterSpill spill(x); std::vector<ManagedRegisterSpill>::push_back(spill); } - void push_back(ManagedRegister __x, int32_t __size) { - ManagedRegisterSpill spill(__x, __size); + void push_back(ManagedRegister x, int32_t size) { + ManagedRegisterSpill spill(x, size); std::vector<ManagedRegisterSpill>::push_back(spill); } - void push_back(ManagedRegisterSpill __x) { - std::vector<ManagedRegisterSpill>::push_back(__x); + void push_back(ManagedRegisterSpill x) { + std::vector<ManagedRegisterSpill>::push_back(x); } private: }; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index a1798c0f70..a99d02d4d0 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -26,6 +26,11 @@ namespace art { namespace mips { +static_assert(static_cast<size_t>(kMipsPointerSize) == kMipsWordSize, + "Unexpected Mips pointer size."); +static_assert(kMipsPointerSize == PointerSize::k32, "Unexpected Mips pointer size."); + + std::ostream& operator<<(std::ostream& os, const DRegister& rhs) { if (rhs >= D0 && rhs < kNumberOfDRegisters) { os << "d" << static_cast<int>(rhs); @@ -35,16 +40,211 @@ std::ostream& operator<<(std::ostream& os, const DRegister& rhs) { return os; } +MipsAssembler::DelaySlot::DelaySlot() + : instruction_(0), + gpr_outs_mask_(0), + gpr_ins_mask_(0), + fpr_outs_mask_(0), + fpr_ins_mask_(0), + cc_outs_mask_(0), + cc_ins_mask_(0) {} + +void MipsAssembler::DsFsmInstr(uint32_t instruction, + uint32_t gpr_outs_mask, + uint32_t gpr_ins_mask, + uint32_t fpr_outs_mask, + uint32_t fpr_ins_mask, + uint32_t cc_outs_mask, + uint32_t cc_ins_mask) { + if (!reordering_) { + CHECK_EQ(ds_fsm_state_, kExpectingLabel); + CHECK_EQ(delay_slot_.instruction_, 0u); + return; + } + switch (ds_fsm_state_) { + case kExpectingLabel: + break; + case kExpectingInstruction: + CHECK_EQ(ds_fsm_target_pc_ + sizeof(uint32_t), buffer_.Size()); + // If the last instruction is not suitable for delay slots, drop + // the PC of the label preceding it so that no unconditional branch + // uses this instruction to fill its delay slot. + if (instruction == 0) { + DsFsmDropLabel(); // Sets ds_fsm_state_ = kExpectingLabel. + } else { + // Otherwise wait for another instruction or label before we can + // commit the label PC. The label PC will be dropped if instead + // of another instruction or label there's a call from the code + // generator to CodePosition() to record the buffer size. + // Instructions after which the buffer size is recorded cannot + // be moved into delay slots or anywhere else because they may + // trigger signals and the signal handlers expect these signals + // to be coming from the instructions immediately preceding the + // recorded buffer locations. + ds_fsm_state_ = kExpectingCommit; + } + break; + case kExpectingCommit: + CHECK_EQ(ds_fsm_target_pc_ + 2 * sizeof(uint32_t), buffer_.Size()); + DsFsmCommitLabel(); // Sets ds_fsm_state_ = kExpectingLabel. + break; + } + delay_slot_.instruction_ = instruction; + delay_slot_.gpr_outs_mask_ = gpr_outs_mask & ~1u; // Ignore register ZERO. + delay_slot_.gpr_ins_mask_ = gpr_ins_mask & ~1u; // Ignore register ZERO. + delay_slot_.fpr_outs_mask_ = fpr_outs_mask; + delay_slot_.fpr_ins_mask_ = fpr_ins_mask; + delay_slot_.cc_outs_mask_ = cc_outs_mask; + delay_slot_.cc_ins_mask_ = cc_ins_mask; +} + +void MipsAssembler::DsFsmLabel() { + if (!reordering_) { + CHECK_EQ(ds_fsm_state_, kExpectingLabel); + CHECK_EQ(delay_slot_.instruction_, 0u); + return; + } + switch (ds_fsm_state_) { + case kExpectingLabel: + ds_fsm_target_pc_ = buffer_.Size(); + ds_fsm_state_ = kExpectingInstruction; + break; + case kExpectingInstruction: + // Allow consecutive labels. + CHECK_EQ(ds_fsm_target_pc_, buffer_.Size()); + break; + case kExpectingCommit: + CHECK_EQ(ds_fsm_target_pc_ + sizeof(uint32_t), buffer_.Size()); + DsFsmCommitLabel(); + ds_fsm_target_pc_ = buffer_.Size(); + ds_fsm_state_ = kExpectingInstruction; + break; + } + // We cannot move instructions into delay slots across labels. + delay_slot_.instruction_ = 0; +} + +void MipsAssembler::DsFsmCommitLabel() { + if (ds_fsm_state_ == kExpectingCommit) { + ds_fsm_target_pcs_.emplace_back(ds_fsm_target_pc_); + } + ds_fsm_state_ = kExpectingLabel; +} + +void MipsAssembler::DsFsmDropLabel() { + ds_fsm_state_ = kExpectingLabel; +} + +bool MipsAssembler::SetReorder(bool enable) { + bool last_state = reordering_; + if (last_state != enable) { + DsFsmCommitLabel(); + DsFsmInstrNop(0); + } + reordering_ = enable; + return last_state; +} + +size_t MipsAssembler::CodePosition() { + // The last instruction cannot be used in a delay slot, do not commit + // the label before it (if any) and clear the delay slot. + DsFsmDropLabel(); + DsFsmInstrNop(0); + size_t size = buffer_.Size(); + // In theory we can get the following sequence: + // label1: + // instr + // label2: # label1 gets committed when label2 is seen + // CodePosition() call + // and we need to uncommit label1. + if (ds_fsm_target_pcs_.size() != 0 && ds_fsm_target_pcs_.back() + sizeof(uint32_t) == size) { + ds_fsm_target_pcs_.pop_back(); + } + return size; +} + +void MipsAssembler::DsFsmInstrNop(uint32_t instruction ATTRIBUTE_UNUSED) { + DsFsmInstr(0, 0, 0, 0, 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2) { + DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction, + Register in1_out, + Register in2, + Register in3) { + DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrFff(uint32_t instruction, + FRegister out, + FRegister in1, + FRegister in2) { + DsFsmInstr(instruction, 0, 0, (1u << out), (1u << in1) | (1u << in2), 0, 0); +} + +void MipsAssembler::DsFsmInstrFfff(uint32_t instruction, + FRegister in1_out, + FRegister in2, + FRegister in3) { + DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0); +} + +void MipsAssembler::DsFsmInstrFffr(uint32_t instruction, + FRegister in1_out, + FRegister in2, + Register in3) { + DsFsmInstr(instruction, 0, (1u << in3), (1u << in1_out), (1u << in1_out) | (1u << in2), 0, 0); +} + +void MipsAssembler::DsFsmInstrRf(uint32_t instruction, Register out, FRegister in) { + DsFsmInstr(instruction, (1u << out), 0, 0, (1u << in), 0, 0); +} + +void MipsAssembler::DsFsmInstrFr(uint32_t instruction, FRegister out, Register in) { + DsFsmInstr(instruction, 0, (1u << in), (1u << out), 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2) { + DsFsmInstr(instruction, 0, (1u << in2), 0, (1u << in1), 0, 0); +} + +void MipsAssembler::DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2) { + DsFsmInstr(instruction, 0, 0, 0, (1u << in1) | (1u << in2), (1 << cc_out), 0); +} + +void MipsAssembler::DsFsmInstrRrrc(uint32_t instruction, + Register in1_out, + Register in2, + int cc_in) { + DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, 0, 0, (1 << cc_in)); +} + +void MipsAssembler::DsFsmInstrFffc(uint32_t instruction, + FRegister in1_out, + FRegister in2, + int cc_in) { + DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, (1 << cc_in)); +} + void MipsAssembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + // Commit the last branch target label (if any) and disable instruction reordering. + DsFsmCommitLabel(); + SetReorder(false); + EmitLiterals(); + ReserveJumpTableSpace(); PromoteBranches(); } void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) { size_t number_of_delayed_adjust_pcs = cfi().NumberOfDelayedAdvancePCs(); EmitBranches(); + EmitJumpTables(); Assembler::FinalizeInstructions(region); PatchCFI(number_of_delayed_adjust_pcs); } @@ -101,6 +301,12 @@ void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) { void MipsAssembler::EmitBranches() { CHECK(!overwriting_); + CHECK(!reordering_); + // Now that everything has its final position in the buffer (the branches have + // been promoted), adjust the target label PCs. + for (size_t cnt = ds_fsm_target_pcs_.size(), i = 0; i < cnt; i++) { + ds_fsm_target_pcs_[i] = GetAdjustedPosition(ds_fsm_target_pcs_[i]); + } // Switch from appending instructions at the end of the buffer to overwriting // existing instructions (branch placeholders) in the buffer. overwriting_ = true; @@ -122,7 +328,12 @@ void MipsAssembler::Emit(uint32_t value) { } } -void MipsAssembler::EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct) { +uint32_t MipsAssembler::EmitR(int opcode, + Register rs, + Register rt, + Register rd, + int shamt, + int funct) { CHECK_NE(rs, kNoRegister); CHECK_NE(rt, kNoRegister); CHECK_NE(rd, kNoRegister); @@ -133,9 +344,10 @@ void MipsAssembler::EmitR(int opcode, Register rs, Register rt, Register rd, int shamt << kShamtShift | funct; Emit(encoding); + return encoding; } -void MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) { +uint32_t MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) { CHECK_NE(rs, kNoRegister); CHECK_NE(rt, kNoRegister); uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | @@ -143,25 +355,32 @@ void MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) { static_cast<uint32_t>(rt) << kRtShift | imm; Emit(encoding); + return encoding; } -void MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) { +uint32_t MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) { CHECK_NE(rs, kNoRegister); CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | imm21; Emit(encoding); + return encoding; } -void MipsAssembler::EmitI26(int opcode, uint32_t imm26) { +uint32_t MipsAssembler::EmitI26(int opcode, uint32_t imm26) { CHECK(IsUint<26>(imm26)) << imm26; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); + return encoding; } -void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, - int funct) { +uint32_t MipsAssembler::EmitFR(int opcode, + int fmt, + FRegister ft, + FRegister fs, + FRegister fd, + int funct) { CHECK_NE(ft, kNoFRegister); CHECK_NE(fs, kNoFRegister); CHECK_NE(fd, kNoFRegister); @@ -172,52 +391,54 @@ void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FReg static_cast<uint32_t>(fd) << kFdShift | funct; Emit(encoding); + return encoding; } -void MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) { +uint32_t MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) { CHECK_NE(ft, kNoFRegister); uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | fmt << kFmtShift | static_cast<uint32_t>(ft) << kFtShift | imm; Emit(encoding); + return encoding; } void MipsAssembler::Addu(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x21); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt); } void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) { - EmitI(0x9, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Subu(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x23); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x23), rd, rs, rt); } void MipsAssembler::MultR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18), ZERO, rs, rt); } void MipsAssembler::MultuR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19), ZERO, rs, rt); } void MipsAssembler::DivR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a), ZERO, rs, rt); } void MipsAssembler::DivuR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b), ZERO, rs, rt); } void MipsAssembler::MulR2(Register rd, Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0x1c, rs, rt, rd, 0, 2); + DsFsmInstrRrr(EmitR(0x1c, rs, rt, rd, 0, 2), rd, rs, rt); } void MipsAssembler::DivR2(Register rd, Register rs, Register rt) { @@ -246,293 +467,333 @@ void MipsAssembler::ModuR2(Register rd, Register rs, Register rt) { void MipsAssembler::MulR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 2, 0x18); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x18), rd, rs, rt); } void MipsAssembler::MuhR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x18); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x18), rd, rs, rt); } void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x19); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x19), rd, rs, rt); } void MipsAssembler::DivR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 2, 0x1a); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1a), rd, rs, rt); } void MipsAssembler::ModR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x1a); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1a), rd, rs, rt); } void MipsAssembler::DivuR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 2, 0x1b); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1b), rd, rs, rt); } void MipsAssembler::ModuR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x1b); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1b), rd, rs, rt); } void MipsAssembler::And(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x24); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x24), rd, rs, rt); } void MipsAssembler::Andi(Register rt, Register rs, uint16_t imm16) { - EmitI(0xc, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xc, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Or(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x25); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x25), rd, rs, rt); } void MipsAssembler::Ori(Register rt, Register rs, uint16_t imm16) { - EmitI(0xd, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xd, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Xor(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x26); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x26), rd, rs, rt); } void MipsAssembler::Xori(Register rt, Register rs, uint16_t imm16) { - EmitI(0xe, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xe, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Nor(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x27); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x27), rd, rs, rt); } void MipsAssembler::Movz(Register rd, Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, rd, 0, 0x0A); + DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0A), rd, rs, rt); } void MipsAssembler::Movn(Register rd, Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, rd, 0, 0x0B); + DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0B), rd, rs, rt); } void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 0, 0x35); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x35), rd, rs, rt); } void MipsAssembler::Selnez(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 0, 0x37); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x37), rd, rs, rt); } void MipsAssembler::ClzR6(Register rd, Register rs) { CHECK(IsR6()); - EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10); + DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10), rd, rs, rs); } void MipsAssembler::ClzR2(Register rd, Register rs) { CHECK(!IsR6()); - EmitR(0x1C, rs, rd, rd, 0, 0x20); + DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x20), rd, rs, rs); } void MipsAssembler::CloR6(Register rd, Register rs) { CHECK(IsR6()); - EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11); + DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11), rd, rs, rs); } void MipsAssembler::CloR2(Register rd, Register rs) { CHECK(!IsR6()); - EmitR(0x1C, rs, rd, rd, 0, 0x21); + DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x21), rd, rs, rs); } void MipsAssembler::Seb(Register rd, Register rt) { - EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20), rd, rt, rt); } void MipsAssembler::Seh(Register rd, Register rt) { - EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20), rd, rt, rt); } void MipsAssembler::Wsbh(Register rd, Register rt) { - EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20), rd, rt, rt); } void MipsAssembler::Bitswap(Register rd, Register rt) { CHECK(IsR6()); - EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20), rd, rt, rt); } void MipsAssembler::Sll(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00), rd, rt, rt); } void MipsAssembler::Srl(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02), rd, rt, rt); } void MipsAssembler::Rotr(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02), rd, rt, rt); } void MipsAssembler::Sra(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03), rd, rt, rt); } void MipsAssembler::Sllv(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 0, 0x04); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x04), rd, rs, rt); } void MipsAssembler::Srlv(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 0, 0x06); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x06), rd, rs, rt); } void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 1, 0x06); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 1, 0x06), rd, rs, rt); } void MipsAssembler::Srav(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 0, 0x07); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x07), rd, rs, rt); } void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) { CHECK(IsUint<5>(pos)) << pos; CHECK(0 < size && size <= 32) << size; CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size; - EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00); + DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00), rd, rt, rt); } void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) { CHECK(IsUint<5>(pos)) << pos; CHECK(0 < size && size <= 32) << size; CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size; - EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04); + DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt); +} + +// TODO: This instruction is available in both R6 and MSA and it should be used when available. +void MipsAssembler::Lsa(Register rd, Register rs, Register rt, int saPlusOne) { + CHECK(IsR6()); + CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne; + int sa = saPlusOne - 1; + DsFsmInstrRrr(EmitR(0x0, rs, rt, rd, sa, 0x05), rd, rs, rt); +} + +void MipsAssembler::ShiftAndAdd(Register dst, + Register src_idx, + Register src_base, + int shamt, + Register tmp) { + CHECK(0 <= shamt && shamt <= 4) << shamt; + CHECK_NE(src_base, tmp); + if (shamt == TIMES_1) { + // Catch the special case where the shift amount is zero (0). + Addu(dst, src_base, src_idx); + } else if (IsR6()) { + Lsa(dst, src_idx, src_base, shamt); + } else { + Sll(tmp, src_idx, shamt); + Addu(dst, src_base, tmp); + } } void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) { - EmitI(0x20, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x20, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lh(Register rt, Register rs, uint16_t imm16) { - EmitI(0x21, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) { - EmitI(0x23, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x22, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x22, rs, rt, imm16), rt, rt, rs); } void MipsAssembler::Lwr(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x26, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x26, rs, rt, imm16), rt, rt, rs); } void MipsAssembler::Lbu(Register rt, Register rs, uint16_t imm16) { - EmitI(0x24, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x24, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lhu(Register rt, Register rs, uint16_t imm16) { - EmitI(0x25, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x25, rs, rt, imm16), rt, rs, rs); +} + +void MipsAssembler::Lwpc(Register rs, uint32_t imm19) { + CHECK(IsR6()); + CHECK(IsUint<19>(imm19)) << imm19; + DsFsmInstrNop(EmitI21(0x3B, rs, (0x01 << 19) | imm19)); } void MipsAssembler::Lui(Register rt, uint16_t imm16) { - EmitI(0xf, static_cast<Register>(0), rt, imm16); + DsFsmInstrRrr(EmitI(0xf, static_cast<Register>(0), rt, imm16), rt, ZERO, ZERO); +} + +void MipsAssembler::Aui(Register rt, Register rs, uint16_t imm16) { + CHECK(IsR6()); + DsFsmInstrRrr(EmitI(0xf, rs, rt, imm16), rt, rt, rs); } void MipsAssembler::Sync(uint32_t stype) { - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0), - stype & 0x1f, 0xf); + DsFsmInstrNop(EmitR(0, ZERO, ZERO, ZERO, stype & 0x1f, 0xf)); } void MipsAssembler::Mfhi(Register rd) { CHECK(!IsR6()); - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x10); + DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x10), rd, ZERO, ZERO); } void MipsAssembler::Mflo(Register rd) { CHECK(!IsR6()); - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x12); + DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x12), rd, ZERO, ZERO); } void MipsAssembler::Sb(Register rt, Register rs, uint16_t imm16) { - EmitI(0x28, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x28, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Sh(Register rt, Register rs, uint16_t imm16) { - EmitI(0x29, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) { - EmitI(0x2b, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x2a, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x2a, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Swr(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x2e, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x2e, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::LlR2(Register rt, Register base, int16_t imm16) { CHECK(!IsR6()); - EmitI(0x30, base, rt, imm16); + DsFsmInstrRrr(EmitI(0x30, base, rt, imm16), rt, base, base); } void MipsAssembler::ScR2(Register rt, Register base, int16_t imm16) { CHECK(!IsR6()); - EmitI(0x38, base, rt, imm16); + DsFsmInstrRrr(EmitI(0x38, base, rt, imm16), rt, rt, base); } void MipsAssembler::LlR6(Register rt, Register base, int16_t imm9) { CHECK(IsR6()); CHECK(IsInt<9>(imm9)); - EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36); + DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36), rt, base, base); } void MipsAssembler::ScR6(Register rt, Register base, int16_t imm9) { CHECK(IsR6()); CHECK(IsInt<9>(imm9)); - EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26); + DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26), rt, rt, base); } void MipsAssembler::Slt(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x2a); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2a), rd, rs, rt); } void MipsAssembler::Sltu(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x2b); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2b), rd, rs, rt); } void MipsAssembler::Slti(Register rt, Register rs, uint16_t imm16) { - EmitI(0xa, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xa, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Sltiu(Register rt, Register rs, uint16_t imm16) { - EmitI(0xb, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xb, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::B(uint16_t imm16) { - EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16)); +} + +void MipsAssembler::Bal(uint16_t imm16) { + DsFsmInstrNop(EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16)); } void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x4, rs, rt, imm16)); } void MipsAssembler::Bne(Register rs, Register rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x5, rs, rt, imm16)); } void MipsAssembler::Beqz(Register rt, uint16_t imm16) { @@ -544,19 +805,19 @@ void MipsAssembler::Bnez(Register rt, uint16_t imm16) { } void MipsAssembler::Bltz(Register rt, uint16_t imm16) { - EmitI(0x1, rt, static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x1, rt, static_cast<Register>(0), imm16)); } void MipsAssembler::Bgez(Register rt, uint16_t imm16) { - EmitI(0x1, rt, static_cast<Register>(0x1), imm16); + DsFsmInstrNop(EmitI(0x1, rt, static_cast<Register>(0x1), imm16)); } void MipsAssembler::Blez(Register rt, uint16_t imm16) { - EmitI(0x6, rt, static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x6, rt, static_cast<Register>(0), imm16)); } void MipsAssembler::Bgtz(Register rt, uint16_t imm16) { - EmitI(0x7, rt, static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x7, rt, static_cast<Register>(0), imm16)); } void MipsAssembler::Bc1f(uint16_t imm16) { @@ -566,7 +827,7 @@ void MipsAssembler::Bc1f(uint16_t imm16) { void MipsAssembler::Bc1f(int cc, uint16_t imm16) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16); + DsFsmInstrNop(EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16)); } void MipsAssembler::Bc1t(uint16_t imm16) { @@ -576,19 +837,45 @@ void MipsAssembler::Bc1t(uint16_t imm16) { void MipsAssembler::Bc1t(int cc, uint16_t imm16) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>((cc << 2) | 1), imm16); + DsFsmInstrNop(EmitI(0x11, + static_cast<Register>(0x8), + static_cast<Register>((cc << 2) | 1), + imm16)); } void MipsAssembler::J(uint32_t addr26) { - EmitI26(0x2, addr26); + DsFsmInstrNop(EmitI26(0x2, addr26)); } void MipsAssembler::Jal(uint32_t addr26) { - EmitI26(0x3, addr26); + DsFsmInstrNop(EmitI26(0x3, addr26)); } void MipsAssembler::Jalr(Register rd, Register rs) { - EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09); + uint32_t last_instruction = delay_slot_.instruction_; + bool exchange = (last_instruction != 0 && + (delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 && + ((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0); + if (exchange) { + // The last instruction cannot be used in a different delay slot, + // do not commit the label before it (if any). + DsFsmDropLabel(); + } + DsFsmInstrNop(EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09)); + if (exchange) { + // Exchange the last two instructions in the assembler buffer. + size_t size = buffer_.Size(); + CHECK_GE(size, 2 * sizeof(uint32_t)); + size_t pos1 = size - 2 * sizeof(uint32_t); + size_t pos2 = size - sizeof(uint32_t); + uint32_t instr1 = buffer_.Load<uint32_t>(pos1); + uint32_t instr2 = buffer_.Load<uint32_t>(pos2); + CHECK_EQ(instr1, last_instruction); + buffer_.Store<uint32_t>(pos1, instr2); + buffer_.Store<uint32_t>(pos2, instr1); + } else if (reordering_) { + Nop(); + } } void MipsAssembler::Jalr(Register rs) { @@ -600,33 +887,38 @@ void MipsAssembler::Jr(Register rs) { } void MipsAssembler::Nal() { - EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0); + DsFsmInstrNop(EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0)); } void MipsAssembler::Auipc(Register rs, uint16_t imm16) { CHECK(IsR6()); - EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16); + DsFsmInstrNop(EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16)); } void MipsAssembler::Addiupc(Register rs, uint32_t imm19) { CHECK(IsR6()); CHECK(IsUint<19>(imm19)) << imm19; - EmitI21(0x3B, rs, imm19); + DsFsmInstrNop(EmitI21(0x3B, rs, imm19)); } void MipsAssembler::Bc(uint32_t imm26) { CHECK(IsR6()); - EmitI26(0x32, imm26); + DsFsmInstrNop(EmitI26(0x32, imm26)); +} + +void MipsAssembler::Balc(uint32_t imm26) { + CHECK(IsR6()); + DsFsmInstrNop(EmitI26(0x3A, imm26)); } void MipsAssembler::Jic(Register rt, uint16_t imm16) { CHECK(IsR6()); - EmitI(0x36, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x36, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Jialc(Register rt, uint16_t imm16) { CHECK(IsR6()); - EmitI(0x3E, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x3E, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Bltc(Register rs, Register rt, uint16_t imm16) { @@ -634,19 +926,19 @@ void MipsAssembler::Bltc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x17, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x17, rs, rt, imm16)); } void MipsAssembler::Bltzc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x17, rt, rt, imm16); + DsFsmInstrNop(EmitI(0x17, rt, rt, imm16)); } void MipsAssembler::Bgtzc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x17, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x17, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Bgec(Register rs, Register rt, uint16_t imm16) { @@ -654,19 +946,19 @@ void MipsAssembler::Bgec(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x16, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x16, rs, rt, imm16)); } void MipsAssembler::Bgezc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x16, rt, rt, imm16); + DsFsmInstrNop(EmitI(0x16, rt, rt, imm16)); } void MipsAssembler::Blezc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x16, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x16, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Bltuc(Register rs, Register rt, uint16_t imm16) { @@ -674,7 +966,7 @@ void MipsAssembler::Bltuc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x7, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x7, rs, rt, imm16)); } void MipsAssembler::Bgeuc(Register rs, Register rt, uint16_t imm16) { @@ -682,7 +974,7 @@ void MipsAssembler::Bgeuc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x6, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x6, rs, rt, imm16)); } void MipsAssembler::Beqc(Register rs, Register rt, uint16_t imm16) { @@ -690,7 +982,7 @@ void MipsAssembler::Beqc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); + DsFsmInstrNop(EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16)); } void MipsAssembler::Bnec(Register rs, Register rt, uint16_t imm16) { @@ -698,29 +990,29 @@ void MipsAssembler::Bnec(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); + DsFsmInstrNop(EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16)); } void MipsAssembler::Beqzc(Register rs, uint32_t imm21) { CHECK(IsR6()); CHECK_NE(rs, ZERO); - EmitI21(0x36, rs, imm21); + DsFsmInstrNop(EmitI21(0x36, rs, imm21)); } void MipsAssembler::Bnezc(Register rs, uint32_t imm21) { CHECK(IsR6()); CHECK_NE(rs, ZERO); - EmitI21(0x3E, rs, imm21); + DsFsmInstrNop(EmitI21(0x3E, rs, imm21)); } void MipsAssembler::Bc1eqz(FRegister ft, uint16_t imm16) { CHECK(IsR6()); - EmitFI(0x11, 0x9, ft, imm16); + DsFsmInstrNop(EmitFI(0x11, 0x9, ft, imm16)); } void MipsAssembler::Bc1nez(FRegister ft, uint16_t imm16) { CHECK(IsR6()); - EmitFI(0x11, 0xD, ft, imm16); + DsFsmInstrNop(EmitFI(0x11, 0xD, ft, imm16)); } void MipsAssembler::EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16) { @@ -842,67 +1134,67 @@ void MipsAssembler::EmitBcondR6(BranchCondition cond, Register rs, Register rt, } void MipsAssembler::AddS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x0); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x0), fd, fs, ft); } void MipsAssembler::SubS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x1); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1), fd, fs, ft); } void MipsAssembler::MulS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x2); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x2), fd, fs, ft); } void MipsAssembler::DivS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x3); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x3), fd, fs, ft); } void MipsAssembler::AddD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x0); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x0), fd, fs, ft); } void MipsAssembler::SubD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x1); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1), fd, fs, ft); } void MipsAssembler::MulD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x2); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x2), fd, fs, ft); } void MipsAssembler::DivD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x3); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x3), fd, fs, ft); } void MipsAssembler::SqrtS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs); } void MipsAssembler::SqrtD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs); } void MipsAssembler::AbsS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs); } void MipsAssembler::AbsD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs); } void MipsAssembler::MovS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs); } void MipsAssembler::MovD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs); } void MipsAssembler::NegS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs); } void MipsAssembler::NegD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs); } void MipsAssembler::CunS(FRegister fs, FRegister ft) { @@ -912,7 +1204,7 @@ void MipsAssembler::CunS(FRegister fs, FRegister ft) { void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft); } void MipsAssembler::CeqS(FRegister fs, FRegister ft) { @@ -922,7 +1214,7 @@ void MipsAssembler::CeqS(FRegister fs, FRegister ft) { void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft); } void MipsAssembler::CueqS(FRegister fs, FRegister ft) { @@ -932,7 +1224,7 @@ void MipsAssembler::CueqS(FRegister fs, FRegister ft) { void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft); } void MipsAssembler::ColtS(FRegister fs, FRegister ft) { @@ -942,7 +1234,7 @@ void MipsAssembler::ColtS(FRegister fs, FRegister ft) { void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft); } void MipsAssembler::CultS(FRegister fs, FRegister ft) { @@ -952,7 +1244,7 @@ void MipsAssembler::CultS(FRegister fs, FRegister ft) { void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft); } void MipsAssembler::ColeS(FRegister fs, FRegister ft) { @@ -962,7 +1254,7 @@ void MipsAssembler::ColeS(FRegister fs, FRegister ft) { void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft); } void MipsAssembler::CuleS(FRegister fs, FRegister ft) { @@ -972,7 +1264,7 @@ void MipsAssembler::CuleS(FRegister fs, FRegister ft) { void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft); } void MipsAssembler::CunD(FRegister fs, FRegister ft) { @@ -982,7 +1274,7 @@ void MipsAssembler::CunD(FRegister fs, FRegister ft) { void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft); } void MipsAssembler::CeqD(FRegister fs, FRegister ft) { @@ -992,7 +1284,7 @@ void MipsAssembler::CeqD(FRegister fs, FRegister ft) { void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft); } void MipsAssembler::CueqD(FRegister fs, FRegister ft) { @@ -1002,7 +1294,7 @@ void MipsAssembler::CueqD(FRegister fs, FRegister ft) { void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft); } void MipsAssembler::ColtD(FRegister fs, FRegister ft) { @@ -1012,7 +1304,7 @@ void MipsAssembler::ColtD(FRegister fs, FRegister ft) { void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft); } void MipsAssembler::CultD(FRegister fs, FRegister ft) { @@ -1022,7 +1314,7 @@ void MipsAssembler::CultD(FRegister fs, FRegister ft) { void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft); } void MipsAssembler::ColeD(FRegister fs, FRegister ft) { @@ -1032,7 +1324,7 @@ void MipsAssembler::ColeD(FRegister fs, FRegister ft) { void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft); } void MipsAssembler::CuleD(FRegister fs, FRegister ft) { @@ -1042,247 +1334,301 @@ void MipsAssembler::CuleD(FRegister fs, FRegister ft) { void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft); } void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x01); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x01), fd, fs, ft); } void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x02); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x02), fd, fs, ft); } void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x03); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x03), fd, fs, ft); } void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x04); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x04), fd, fs, ft); } void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x05); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x05), fd, fs, ft); } void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x06); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x06), fd, fs, ft); } void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x07); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x07), fd, fs, ft); } void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x11); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x11), fd, fs, ft); } void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x12); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x12), fd, fs, ft); } void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x13); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x13), fd, fs, ft); } void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x01); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x01), fd, fs, ft); } void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x02); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x02), fd, fs, ft); } void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x03); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x03), fd, fs, ft); } void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x04); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x04), fd, fs, ft); } void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x05); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x05), fd, fs, ft); } void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x06); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x06), fd, fs, ft); } void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x07); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x07), fd, fs, ft); } void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x11); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x11), fd, fs, ft); } void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x12); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x12), fd, fs, ft); } void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x13); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x13), fd, fs, ft); } void MipsAssembler::Movf(Register rd, Register rs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01); + DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01), rd, rs, cc); } void MipsAssembler::Movt(Register rd, Register rs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01); + DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01), rd, rs, cc); } void MipsAssembler::MovfS(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc); } void MipsAssembler::MovfD(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc); } void MipsAssembler::MovtS(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11), + fd, + fs, + cc); } void MipsAssembler::MovtD(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11), + fd, + fs, + cc); +} + +void MipsAssembler::MovzS(FRegister fd, FRegister fs, Register rt) { + CHECK(!IsR6()); + DsFsmInstrFffr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x12), fd, fs, rt); +} + +void MipsAssembler::MovzD(FRegister fd, FRegister fs, Register rt) { + CHECK(!IsR6()); + DsFsmInstrFffr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x12), fd, fs, rt); +} + +void MipsAssembler::MovnS(FRegister fd, FRegister fs, Register rt) { + CHECK(!IsR6()); + DsFsmInstrFffr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x13), fd, fs, rt); +} + +void MipsAssembler::MovnD(FRegister fd, FRegister fs, Register rt) { + CHECK(!IsR6()); + DsFsmInstrFffr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x13), fd, fs, rt); } void MipsAssembler::SelS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x10, ft, fs, fd, 0x10); + DsFsmInstrFfff(EmitFR(0x11, 0x10, ft, fs, fd, 0x10), fd, fs, ft); } void MipsAssembler::SelD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x11, ft, fs, fd, 0x10); + DsFsmInstrFfff(EmitFR(0x11, 0x11, ft, fs, fd, 0x10), fd, fs, ft); +} + +void MipsAssembler::SeleqzS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x14), fd, fs, ft); +} + +void MipsAssembler::SeleqzD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x14), fd, fs, ft); +} + +void MipsAssembler::SelnezS(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x17), fd, fs, ft); +} + +void MipsAssembler::SelnezD(FRegister fd, FRegister fs, FRegister ft) { + CHECK(IsR6()); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x17), fd, fs, ft); } void MipsAssembler::ClassS(FRegister fd, FRegister fs) { CHECK(IsR6()); - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs); } void MipsAssembler::ClassD(FRegister fd, FRegister fs) { CHECK(IsR6()); - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs); } void MipsAssembler::MinS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x10, ft, fs, fd, 0x1c); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1c), fd, fs, ft); } void MipsAssembler::MinD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x11, ft, fs, fd, 0x1c); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1c), fd, fs, ft); } void MipsAssembler::MaxS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x10, ft, fs, fd, 0x1e); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1e), fd, fs, ft); } void MipsAssembler::MaxD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x11, ft, fs, fd, 0x1e); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1e), fd, fs, ft); } void MipsAssembler::TruncLS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs); } void MipsAssembler::TruncLD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs); } void MipsAssembler::TruncWS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs); } void MipsAssembler::TruncWD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs); } void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20); + DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs); } void MipsAssembler::Cvtdw(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21); + DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs); } void MipsAssembler::Cvtsd(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs); } void MipsAssembler::Cvtds(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs); } void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20); + DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs); } void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21); + DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs); } void MipsAssembler::FloorWS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs); } void MipsAssembler::FloorWD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs); } void MipsAssembler::Mfc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrRf(EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + rt, + fs); } void MipsAssembler::Mtc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrFr(EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + fs, + rt); } void MipsAssembler::Mfhc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrRf(EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + rt, + fs); } void MipsAssembler::Mthc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrFr(EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + fs, + rt); } void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) { @@ -1304,28 +1650,33 @@ void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) { } void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x31, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFr(EmitI(0x31, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Ldc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x35, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFr(EmitI(0x35, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Swc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x39, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFR(EmitI(0x39, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Sdc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x3d, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFR(EmitI(0x3d, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Break() { - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), - static_cast<Register>(0), 0, 0xD); + DsFsmInstrNop(EmitR(0, ZERO, ZERO, ZERO, 0, 0xD)); } void MipsAssembler::Nop() { - EmitR(0x0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0), 0, 0x0); + DsFsmInstrNop(EmitR(0x0, ZERO, ZERO, ZERO, 0, 0x0)); +} + +void MipsAssembler::NopIfNoReordering() { + if (!reordering_) { + Nop(); + } } void MipsAssembler::Move(Register rd, Register rs) { @@ -1351,9 +1702,11 @@ void MipsAssembler::Pop(Register rd) { } void MipsAssembler::PopAndReturn(Register rd, Register rt) { + bool reordering = SetReorder(false); Lw(rd, SP, 0); Jr(rt); - DecreaseFrameSize(kMipsWordSize); + DecreaseFrameSize(kMipsWordSize); // Single instruction in delay slot. + SetReorder(reordering); } void MipsAssembler::LoadConst32(Register rd, int32_t value) { @@ -1381,55 +1734,6 @@ void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) } } -void MipsAssembler::StoreConst32ToOffset(int32_t value, - Register base, - int32_t offset, - Register temp) { - if (!IsInt<16>(offset)) { - CHECK_NE(temp, AT); // Must not use AT as temp, as not to overwrite the loaded value. - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; - } - if (value == 0) { - temp = ZERO; - } else { - LoadConst32(temp, value); - } - Sw(temp, base, offset); -} - -void MipsAssembler::StoreConst64ToOffset(int64_t value, - Register base, - int32_t offset, - Register temp) { - // IsInt<16> must be passed a signed value. - if (!IsInt<16>(offset) || !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize))) { - CHECK_NE(temp, AT); // Must not use AT as temp, as not to overwrite the loaded value. - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; - } - uint32_t low = Low32Bits(value); - uint32_t high = High32Bits(value); - if (low == 0) { - Sw(ZERO, base, offset); - } else { - LoadConst32(temp, low); - Sw(temp, base, offset); - } - if (high == 0) { - Sw(ZERO, base, offset + kMipsWordSize); - } else { - if (high != low) { - LoadConst32(temp, high); - } - Sw(temp, base, offset + kMipsWordSize); - } -} - void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) { if (value == 0) { temp = ZERO; @@ -1457,11 +1761,35 @@ void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) { } void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) { + CHECK_NE(rs, temp); // Must not overwrite the register `rs` while loading `value`. if (IsInt<16>(value)) { Addiu(rt, rs, value); + } else if (IsR6()) { + int16_t high = High16Bits(value); + int16_t low = Low16Bits(value); + high += (low < 0) ? 1 : 0; // Account for sign extension in addiu. + if (low != 0) { + Aui(temp, rs, high); + Addiu(rt, temp, low); + } else { + Aui(rt, rs, high); + } } else { - LoadConst32(temp, value); - Addu(rt, rs, temp); + // Do not load the whole 32-bit `value` if it can be represented as + // a sum of two 16-bit signed values. This can save an instruction. + constexpr int32_t kMinValueForSimpleAdjustment = std::numeric_limits<int16_t>::min() * 2; + constexpr int32_t kMaxValueForSimpleAdjustment = std::numeric_limits<int16_t>::max() * 2; + if (0 <= value && value <= kMaxValueForSimpleAdjustment) { + Addiu(temp, rs, kMaxValueForSimpleAdjustment / 2); + Addiu(rt, temp, value - kMaxValueForSimpleAdjustment / 2); + } else if (kMinValueForSimpleAdjustment <= value && value < 0) { + Addiu(temp, rs, kMinValueForSimpleAdjustment / 2); + Addiu(rt, temp, value - kMinValueForSimpleAdjustment / 2); + } else { + // Now that all shorter options have been exhausted, load the full 32-bit value. + LoadConst32(temp, value); + Addu(rt, rs, temp); + } } } @@ -1471,30 +1799,68 @@ void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits of type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) { +void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) { OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); if (is_r6) { // R6 - if (is_call) { - InitShortOrLong(offset_size, kR6Call, kR6LongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); - } else { - if (condition_ == kCondEQZ || condition_ == kCondNEZ) { - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; - } else { - InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); - } + switch (initial_type) { + case kLabel: + CHECK(!IsResolved()); + type_ = kR6Label; + break; + case kLiteral: + CHECK(!IsResolved()); + type_ = kR6Literal; + break; + case kCall: + InitShortOrLong(offset_size, kR6Call, kR6LongCall); + break; + case kCondBranch: + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; + break; + default: + InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + break; + } + break; + default: + LOG(FATAL) << "Unexpected branch type " << initial_type; + UNREACHABLE(); } } else { // R2 - if (is_call) { - InitShortOrLong(offset_size, kCall, kLongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); - } else { - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + switch (initial_type) { + case kLabel: + CHECK(!IsResolved()); + type_ = kLabel; + break; + case kLiteral: + CHECK(!IsResolved()); + type_ = kLiteral; + break; + case kCall: + InitShortOrLong(offset_size, kCall, kLongCall); + break; + case kCondBranch: + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + break; + default: + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + break; + } + break; + default: + LOG(FATAL) << "Unexpected branch type " << initial_type; + UNREACHABLE(); } } old_type_ = type_; @@ -1526,14 +1892,15 @@ bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Re } } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target) +MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call) : old_location_(location), location_(location), target_(target), lhs_reg_(0), rhs_reg_(0), - condition_(kUncond) { - InitializeType(false, is_r6); + condition_(kUncond), + delayed_instruction_(kUnfilledDelaySlot) { + InitializeType((is_call ? kCall : kCondBranch), is_r6); } MipsAssembler::Branch::Branch(bool is_r6, @@ -1547,7 +1914,8 @@ MipsAssembler::Branch::Branch(bool is_r6, target_(target), lhs_reg_(lhs_reg), rhs_reg_(rhs_reg), - condition_(condition) { + condition_(condition), + delayed_instruction_(kUnfilledDelaySlot) { CHECK_NE(condition, kUncond); switch (condition) { case kCondLT: @@ -1590,19 +1958,28 @@ MipsAssembler::Branch::Branch(bool is_r6, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(false, is_r6); + InitializeType(kCondBranch, is_r6); } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg) +MipsAssembler::Branch::Branch(bool is_r6, + uint32_t location, + Register dest_reg, + Register base_reg, + Type label_or_literal_type) : old_location_(location), location_(location), - target_(target), - lhs_reg_(indirect_reg), - rhs_reg_(0), - condition_(kUncond) { - CHECK_NE(indirect_reg, ZERO); - CHECK_NE(indirect_reg, AT); - InitializeType(true, is_r6); + target_(kUnresolved), + lhs_reg_(dest_reg), + rhs_reg_(base_reg), + condition_(kUncond), + delayed_instruction_(kUnfilledDelaySlot) { + CHECK_NE(dest_reg, ZERO); + if (is_r6) { + CHECK_EQ(base_reg, ZERO); + } else { + CHECK_NE(base_reg, ZERO); + } + InitializeType(label_or_literal_type, is_r6); } MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition( @@ -1674,12 +2051,38 @@ uint32_t MipsAssembler::Branch::GetOldLocation() const { return old_location_; } +uint32_t MipsAssembler::Branch::GetPrecedingInstructionLength(Type type) const { + // Short branches with delay slots always consist of two instructions, the branch + // and the delay slot, irrespective of whether the delay slot is filled with a + // useful instruction or not. + // Long composite branches may have a length longer by one instruction than + // specified in branch_info_[].length. This happens when an instruction is taken + // to fill the short branch delay slot, but the branch eventually becomes long + // and formally has no delay slot to fill. This instruction is placed at the + // beginning of the long composite branch and this needs to be accounted for in + // the branch length and the location of the offset encoded in the branch. + switch (type) { + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + case kR6LongCondBranch: + return (delayed_instruction_ != kUnfilledDelaySlot && + delayed_instruction_ != kUnfillableDelaySlot) ? 1 : 0; + default: + return 0; + } +} + +uint32_t MipsAssembler::Branch::GetPrecedingInstructionSize(Type type) const { + return GetPrecedingInstructionLength(type) * sizeof(uint32_t); +} + uint32_t MipsAssembler::Branch::GetLength() const { - return branch_info_[type_].length; + return GetPrecedingInstructionLength(type_) + branch_info_[type_].length; } uint32_t MipsAssembler::Branch::GetOldLength() const { - return branch_info_[old_type_].length; + return GetPrecedingInstructionLength(old_type_) + branch_info_[old_type_].length; } uint32_t MipsAssembler::Branch::GetSize() const { @@ -1704,19 +2107,35 @@ bool MipsAssembler::Branch::IsLong() const { case kUncondBranch: case kCondBranch: case kCall: + // R2 near label. + case kLabel: + // R2 near literal. + case kLiteral: // R6 short branches. case kR6UncondBranch: case kR6CondBranch: case kR6Call: + // R6 near label. + case kR6Label: + // R6 near literal. + case kR6Literal: return false; // R2 long branches. case kLongUncondBranch: case kLongCondBranch: case kLongCall: + // R2 far label. + case kFarLabel: + // R2 far literal. + case kFarLiteral: // R6 long branches. case kR6LongUncondBranch: case kR6LongCondBranch: case kR6LongCall: + // R6 far label. + case kR6FarLabel: + // R6 far literal. + case kR6FarLiteral: return true; } UNREACHABLE(); @@ -1785,6 +2204,14 @@ void MipsAssembler::Branch::PromoteToLong() { case kCall: type_ = kLongCall; break; + // R2 near label. + case kLabel: + type_ = kFarLabel; + break; + // R2 near literal. + case kLiteral: + type_ = kFarLiteral; + break; // R6 short branches. case kR6UncondBranch: type_ = kR6LongUncondBranch; @@ -1795,6 +2222,14 @@ void MipsAssembler::Branch::PromoteToLong() { case kR6Call: type_ = kR6LongCall; break; + // R6 near label. + case kR6Label: + type_ = kR6FarLabel; + break; + // R6 near literal. + case kR6Literal: + type_ = kR6FarLiteral; + break; default: // Note: 'type_' is already long. break; @@ -1802,14 +2237,28 @@ void MipsAssembler::Branch::PromoteToLong() { CHECK(IsLong()); } -uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { +uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLabel: + case Branch::kFarLabel: + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetLocation(); + } +} + +uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or + // `this->GetLocation()` for everything else. // If the branch is still unresolved or already long, nothing to do. if (IsLong() || !IsResolved()) { return 0; } // Promote the short branch to long if the offset size is too small - // to hold the distance between location_ and target_. - if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + // to hold the distance between location and target_. + if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) { PromoteToLong(); uint32_t old_size = GetOldSize(); uint32_t new_size = GetSize(); @@ -1819,7 +2268,7 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { // The following logic is for debugging/testing purposes. // Promote some short branches to long when it's not really required. if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { - int64_t distance = static_cast<int64_t>(target_) - location_; + int64_t distance = static_cast<int64_t>(target_) - location; distance = (distance >= 0) ? distance : -distance; if (distance >= max_short_distance) { PromoteToLong(); @@ -1833,15 +2282,32 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { } uint32_t MipsAssembler::Branch::GetOffsetLocation() const { - return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); + return location_ + GetPrecedingInstructionSize(type_) + + branch_info_[type_].instr_offset * sizeof(uint32_t); +} + +uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLabel: + case Branch::kFarLabel: + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetOffsetLocation() + + Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t); + } } -uint32_t MipsAssembler::Branch::GetOffset() const { +uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or + // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)` + // for everything else. CHECK(IsResolved()); uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); // Calculate the byte distance between instructions and also account for // different PC-relative origins. - uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + uint32_t offset = target_ - location; // Prepare the offset for encoding into the instruction(s). offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; return offset; @@ -1861,6 +2327,9 @@ void MipsAssembler::Bind(MipsLabel* label) { CHECK(!label->IsBound()); uint32_t bound_pc = buffer_.Size(); + // Make the delay slot FSM aware of the new label. + DsFsmLabel(); + // Walk the list of branches referring to and preceding this label. // Store the previously unknown target addresses in them. while (label->IsLinked()) { @@ -1888,7 +2357,7 @@ void MipsAssembler::Bind(MipsLabel* label) { label->BindTo(bound_pc); } -uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const { +uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const { CHECK(label->IsBound()); uint32_t target = label->Position(); if (label->prev_branch_id_plus_one_) { @@ -1923,13 +2392,25 @@ uint32_t MipsAssembler::GetAdjustedPosition(uint32_t old_position) { return old_position + last_position_adjustment_; } +void MipsAssembler::BindPcRelBaseLabel() { + Bind(&pc_rel_base_label_); +} + +uint32_t MipsAssembler::GetPcRelBaseLabelLocation() const { + return GetLabelLocation(&pc_rel_base_label_); +} + void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { uint32_t length = branches_.back().GetLength(); + // Commit the last branch target label (if any). + DsFsmCommitLabel(); if (!label->IsBound()) { // Branch forward (to a following label), distance is unknown. // The first branch forward will contain 0, serving as the terminator of // the list of forward-reaching branches. Emit(label->position_); + // Nothing for the delay slot (yet). + DsFsmInstrNop(0); length--; // Now make the label object point to this branch // (this forms a linked list of branches preceding this label). @@ -1942,9 +2423,139 @@ void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { } } +bool MipsAssembler::Branch::CanHaveDelayedInstruction(const DelaySlot& delay_slot) const { + if (delay_slot.instruction_ == 0) { + // NOP or no instruction for the delay slot. + return false; + } + switch (type_) { + // R2 unconditional branches. + case kUncondBranch: + case kLongUncondBranch: + // There are no register interdependencies. + return true; + + // R2 calls. + case kCall: + case kLongCall: + // Instructions depending on or modifying RA should not be moved into delay slots + // of branches modifying RA. + return ((delay_slot.gpr_ins_mask_ | delay_slot.gpr_outs_mask_) & (1u << RA)) == 0; + + // R2 conditional branches. + case kCondBranch: + case kLongCondBranch: + switch (condition_) { + // Branches with one GPR source. + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + return (delay_slot.gpr_outs_mask_ & (1u << lhs_reg_)) == 0; + + // Branches with two GPR sources. + case kCondEQ: + case kCondNE: + return (delay_slot.gpr_outs_mask_ & ((1u << lhs_reg_) | (1u << rhs_reg_))) == 0; + + // Branches with one FPU condition code source. + case kCondF: + case kCondT: + return (delay_slot.cc_outs_mask_ & (1u << lhs_reg_)) == 0; + + default: + // We don't support synthetic R2 branches (preceded with slt[u]) at this level + // (R2 doesn't have branches to compare 2 registers using <, <=, >=, >). + LOG(FATAL) << "Unexpected branch condition " << condition_; + UNREACHABLE(); + } + + // R6 unconditional branches. + case kR6UncondBranch: + case kR6LongUncondBranch: + // R6 calls. + case kR6Call: + case kR6LongCall: + // There are no delay slots. + return false; + + // R6 conditional branches. + case kR6CondBranch: + case kR6LongCondBranch: + switch (condition_) { + // Branches with one FPU register source. + case kCondF: + case kCondT: + return (delay_slot.fpr_outs_mask_ & (1u << lhs_reg_)) == 0; + // Others have a forbidden slot instead of a delay slot. + default: + return false; + } + + // Literals. + default: + LOG(FATAL) << "Unexpected branch type " << type_; + UNREACHABLE(); + } +} + +uint32_t MipsAssembler::Branch::GetDelayedInstruction() const { + return delayed_instruction_; +} + +void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction) { + CHECK_NE(instruction, kUnfilledDelaySlot); + CHECK_EQ(delayed_instruction_, kUnfilledDelaySlot); + delayed_instruction_ = instruction; +} + +void MipsAssembler::Branch::DecrementLocations() { + // We first create a branch object, which gets its type and locations initialized, + // and then we check if the branch can actually have the preceding instruction moved + // into its delay slot. If it can, the branch locations need to be decremented. + // + // We could make the check before creating the branch object and avoid the location + // adjustment, but the check is cleaner when performed on an initialized branch + // object. + // + // If the branch is backwards (to a previously bound label), reducing the locations + // cannot cause a short branch to exceed its offset range because the offset reduces. + // And this is not at all a problem for a long branch backwards. + // + // If the branch is forward (not linked to any label yet), reducing the locations + // is harmless. The branch will be promoted to long if needed when the target is known. + CHECK_EQ(location_, old_location_); + CHECK_GE(old_location_, sizeof(uint32_t)); + old_location_ -= sizeof(uint32_t); + location_ = old_location_; +} + +void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) { + if (branch.CanHaveDelayedInstruction(delay_slot_)) { + // The last instruction cannot be used in a different delay slot, + // do not commit the label before it (if any). + DsFsmDropLabel(); + // Remove the last emitted instruction. + size_t size = buffer_.Size(); + CHECK_GE(size, sizeof(uint32_t)); + size -= sizeof(uint32_t); + CHECK_EQ(buffer_.Load<uint32_t>(size), delay_slot_.instruction_); + buffer_.Resize(size); + // Attach it to the branch and adjust the branch locations. + branch.DecrementLocations(); + branch.SetDelayedInstruction(delay_slot_.instruction_); + } else if (!reordering_ && branch.GetType() == Branch::kUncondBranch) { + // If reordefing is disabled, prevent absorption of the target instruction. + branch.SetDelayedInstruction(Branch::kUnfillableDelaySlot); + } +} + void MipsAssembler::Buncond(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false); + MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } @@ -1955,15 +2566,116 @@ void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register } uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs); + MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } -void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) { +void MipsAssembler::Call(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true); + MoveInstructionToDelaySlot(branches_.back()); + FinalizeLabeledBranch(label); +} + +void MipsAssembler::LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label) { + // Label address loads are treated as pseudo branches since they require very similar handling. + DCHECK(!label->IsBound()); + branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLabel); + FinalizeLabeledBranch(label); +} + +Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) { + DCHECK(size == 4u || size == 8u) << size; + literals_.emplace_back(size, data); + return &literals_.back(); +} + +void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) { + // Literal loads are treated as pseudo branches since they require very similar handling. + DCHECK_EQ(literal->GetSize(), 4u); + MipsLabel* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLiteral); FinalizeLabeledBranch(label); } +JumpTable* MipsAssembler::CreateJumpTable(std::vector<MipsLabel*>&& labels) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + return table; +} + +void MipsAssembler::EmitLiterals() { + if (!literals_.empty()) { + // We don't support byte and half-word literals. + // TODO: proper alignment for 64-bit literals when they're implemented. + for (Literal& literal : literals_) { + MipsLabel* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + +void MipsAssembler::ReserveJumpTableSpace() { + if (!jump_tables_.empty()) { + for (JumpTable& table : jump_tables_) { + MipsLabel* label = table.GetLabel(); + Bind(label); + + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + size_t required_capacity = orig_size + table.GetSize(); + if (required_capacity > buffer_.Capacity()) { + buffer_.ExtendCapacity(required_capacity); + } +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + // Fill the space with dummy data as the data is not final + // until the branches have been promoted. And we shouldn't + // be moving uninitialized data during branch promotion. + for (size_t cnt = table.GetData().size(), i = 0; i < cnt; i++) { + buffer_.Emit<uint32_t>(0x1abe1234u); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + } + } +} + +void MipsAssembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (here, jump tables) in the buffer. + overwriting_ = true; + + for (JumpTable& table : jump_tables_) { + MipsLabel* table_label = table.GetLabel(); + uint32_t start = GetLabelLocation(table_label); + overwrite_location_ = start; + + for (MipsLabel* target : table.GetData()) { + CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u); + // The table will contain target addresses relative to the table start. + uint32_t offset = GetLabelLocation(target) - start; + Emit(offset); + } + } + + overwriting_ = false; + } +} + void MipsAssembler::PromoteBranches() { // Promote short branches to long as necessary. bool changed; @@ -1971,7 +2683,8 @@ void MipsAssembler::PromoteBranches() { changed = false; for (auto& branch : branches_) { CHECK(branch.IsResolved()); - uint32_t delta = branch.PromoteIfNeeded(); + uint32_t base = GetBranchLocationOrPcRelBase(&branch); + uint32_t delta = branch.PromoteIfNeeded(base); // If this branch has been promoted and needs to expand in size, // relocate all branches by the expansion size. if (delta) { @@ -1997,6 +2710,7 @@ void MipsAssembler::PromoteBranches() { uint32_t end = old_size; for (size_t i = branch_count; i > 0; ) { Branch& branch = branches_[--i]; + CHECK_GE(end, branch.GetOldEndLocation()); uint32_t size = end - branch.GetOldEndLocation(); buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); end = branch.GetOldLocation(); @@ -2009,49 +2723,101 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = // R2 short branches. { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch - { 5, 2, 0, MipsAssembler::Branch::kOffset16, 0 }, // kCall + { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall + // R2 near label. + { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLabel + // R2 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLiteral // R2 long branches. { 9, 3, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongUncondBranch { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCondBranch { 6, 1, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCall + // R2 far label. + { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLabel + // R2 far literal. + { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral // R6 short branches. { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6UncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch // Exception: kOffset23 for beqzc/bnezc. - { 2, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Call + { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call + // R6 near label. + { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Label + // R6 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Literal // R6 long branches. { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongUncondBranch { 3, 1, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCondBranch - { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + // R6 far label. + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLabel + // R6 far literal. + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral }; -// Note: make sure branch_info_[] and mitBranch() are kept synchronized. +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwriting_, true); overwrite_location_ = branch->GetLocation(); - uint32_t offset = branch->GetOffset(); + uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch)); BranchCondition condition = branch->GetCondition(); Register lhs = branch->GetLeftRegister(); Register rhs = branch->GetRightRegister(); + uint32_t delayed_instruction = branch->GetDelayedInstruction(); switch (branch->GetType()) { // R2 short branches. case Branch::kUncondBranch: + if (delayed_instruction == Branch::kUnfillableDelaySlot) { + // The branch was created when reordering was disabled, do not absorb the target + // instruction. + delayed_instruction = 0; // NOP. + } else if (delayed_instruction == Branch::kUnfilledDelaySlot) { + // Try to absorb the target instruction into the delay slot. + delayed_instruction = 0; // NOP. + // Incrementing the signed 16-bit offset past the target instruction must not + // cause overflow into the negative subrange, check for the max offset. + if (offset != 0x7FFF) { + uint32_t target = branch->GetTarget(); + if (std::binary_search(ds_fsm_target_pcs_.begin(), ds_fsm_target_pcs_.end(), target)) { + delayed_instruction = buffer_.Load<uint32_t>(target); + offset++; + } + } + } CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); B(offset); - Nop(); // TODO: improve by filling the delay slot. + Emit(delayed_instruction); break; case Branch::kCondBranch: + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction == Branch::kUnfilledDelaySlot) { + delayed_instruction = 0; // NOP. + } CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); EmitBcondR2(condition, lhs, rhs, offset); - Nop(); // TODO: improve by filling the delay slot. + Emit(delayed_instruction); break; case Branch::kCall: - Nal(); - Nop(); // TODO: is this NOP really needed here? + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction == Branch::kUnfilledDelaySlot) { + delayed_instruction = 0; // NOP. + } CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiu(lhs, RA, offset); - Jalr(lhs); - Nop(); + Bal(offset); + Emit(delayed_instruction); + break; + + // R2 near label. + case Branch::kLabel: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiu(lhs, rhs, offset); + break; + // R2 near literal. + case Branch::kLiteral: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lw(lhs, rhs, offset); break; // R2 long branches. @@ -2074,6 +2840,12 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { // For now simply use the stack for RA. This should be OK since for the // vast majority of code a short PC-relative branch is sufficient. // TODO: can this be improved? + // TODO: consider generation of a shorter sequence when we know that RA + // is explicitly preserved by the method entry/exit code. + if (delayed_instruction != Branch::kUnfilledDelaySlot && + delayed_instruction != Branch::kUnfillableDelaySlot) { + Emit(delayed_instruction); + } Push(RA); Nal(); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -2086,6 +2858,10 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { break; case Branch::kLongCondBranch: // The comment on case 'Branch::kLongUncondBranch' applies here as well. + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } // Note: the opposite condition branch encodes 8 as the distance, which is equal to the // number of instructions skipped: // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR). @@ -2101,39 +2877,87 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { DecreaseFrameSize(kMipsWordSize); break; case Branch::kLongCall: + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } Nal(); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); Ori(AT, AT, Low16Bits(offset)); - Addu(lhs, AT, RA); - Jalr(lhs); + Addu(AT, AT, RA); + Jalr(AT); Nop(); break; + // R2 far label. + case Branch::kFarLabel: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lui(AT, High16Bits(offset)); + Ori(AT, AT, Low16Bits(offset)); + Addu(lhs, AT, rhs); + break; + // R2 far literal. + case Branch::kFarLiteral: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lui(AT, High16Bits(offset)); + Addu(AT, AT, rhs); + Lw(lhs, AT, Low16Bits(offset)); + break; + // R6 short branches. case Branch::kR6UncondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Bc(offset); break; case Branch::kR6CondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); EmitBcondR6(condition, lhs, rhs, offset); - Nop(); // TODO: improve by filling the forbidden/delay slot. + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } else { + // TODO: improve by filling the forbidden slot (IFF this is + // a forbidden and not a delay slot). + Nop(); + } break; case Branch::kR6Call: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Balc(offset); + break; + + // R6 near label. + case Branch::kR6Label: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Addiupc(lhs, offset); - Jialc(lhs, 0); + break; + // R6 near literal. + case Branch::kR6Literal: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwpc(lhs, offset); break; // R6 long branches. case Branch::kR6LongUncondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); offset += (offset & 0x8000) << 1; // Account for sign extension in jic. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Auipc(AT, High16Bits(offset)); Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCondBranch: + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2); offset += (offset & 0x8000) << 1; // Account for sign extension in jic. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -2141,11 +2965,28 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCall: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + offset += (offset & 0x8000) << 1; // Account for sign extension in jialc. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jialc(AT, Low16Bits(offset)); + break; + + // R6 far label. + case Branch::kR6FarLabel: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); offset += (offset & 0x8000) << 1; // Account for sign extension in addiu. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Auipc(lhs, High16Bits(offset)); - Addiu(lhs, lhs, Low16Bits(offset)); - Jialc(lhs, 0); + Auipc(AT, High16Bits(offset)); + Addiu(lhs, AT, Low16Bits(offset)); + break; + // R6 far literal. + case Branch::kR6FarLiteral: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lw(lhs, AT, Low16Bits(offset)); break; } CHECK_EQ(overwrite_location_, branch->GetEndLocation()); @@ -2156,8 +2997,8 @@ void MipsAssembler::B(MipsLabel* label) { Buncond(label); } -void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) { - Call(label, indirect_reg); +void MipsAssembler::Bal(MipsLabel* label) { + Call(label); } void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) { @@ -2192,12 +3033,60 @@ void MipsAssembler::Bgtz(Register rt, MipsLabel* label) { Bcond(label, kCondGTZ, rt); } +bool MipsAssembler::CanExchangeWithSlt(Register rs, Register rt) const { + // If the instruction modifies AT, `rs` or `rt`, it can't be exchanged with the slt[u] + // instruction because either slt[u] depends on `rs` or `rt` or the following + // conditional branch depends on AT set by slt[u]. + // Likewise, if the instruction depends on AT, it can't be exchanged with slt[u] + // because slt[u] changes AT. + return (delay_slot_.instruction_ != 0 && + (delay_slot_.gpr_outs_mask_ & ((1u << AT) | (1u << rs) | (1u << rt))) == 0 && + (delay_slot_.gpr_ins_mask_ & (1u << AT)) == 0); +} + +void MipsAssembler::ExchangeWithSlt(const DelaySlot& forwarded_slot) { + // Exchange the last two instructions in the assembler buffer. + size_t size = buffer_.Size(); + CHECK_GE(size, 2 * sizeof(uint32_t)); + size_t pos1 = size - 2 * sizeof(uint32_t); + size_t pos2 = size - sizeof(uint32_t); + uint32_t instr1 = buffer_.Load<uint32_t>(pos1); + uint32_t instr2 = buffer_.Load<uint32_t>(pos2); + CHECK_EQ(instr1, forwarded_slot.instruction_); + CHECK_EQ(instr2, delay_slot_.instruction_); + buffer_.Store<uint32_t>(pos1, instr2); + buffer_.Store<uint32_t>(pos2, instr1); + // Set the current delay slot information to that of the last instruction + // in the buffer. + delay_slot_ = forwarded_slot; +} + +void MipsAssembler::GenerateSltForCondBranch(bool unsigned_slt, Register rs, Register rt) { + // If possible, exchange the slt[u] instruction with the preceding instruction, + // so it can fill the delay slot. + DelaySlot forwarded_slot = delay_slot_; + bool exchange = CanExchangeWithSlt(rs, rt); + if (exchange) { + // The last instruction cannot be used in a different delay slot, + // do not commit the label before it (if any). + DsFsmDropLabel(); + } + if (unsigned_slt) { + Sltu(AT, rs, rt); + } else { + Slt(AT, rs, rt); + } + if (exchange) { + ExchangeWithSlt(forwarded_slot); + } +} + void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) { if (IsR6()) { Bcond(label, kCondLT, rs, rt); } else if (!Branch::IsNop(kCondLT, rs, rt)) { // Synthesize the instruction (not available on R2). - Slt(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); Bnez(AT, label); } } @@ -2209,7 +3098,7 @@ void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label) { B(label); } else { // Synthesize the instruction (not available on R2). - Slt(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); Beqz(AT, label); } } @@ -2219,7 +3108,7 @@ void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label) { Bcond(label, kCondLTU, rs, rt); } else if (!Branch::IsNop(kCondLTU, rs, rt)) { // Synthesize the instruction (not available on R2). - Sltu(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); Bnez(AT, label); } } @@ -2231,7 +3120,7 @@ void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) { B(label); } else { // Synthesize the instruction (not available on R2). - Sltu(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); Beqz(AT, label); } } @@ -2262,84 +3151,113 @@ void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) { Bcond(label, kCondT, static_cast<Register>(ft), ZERO); } -void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base, - int32_t offset) { - // IsInt<16> must be passed a signed value. - if (!IsInt<16>(offset) || - (type == kLoadDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) { - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; +void MipsAssembler::AdjustBaseAndOffset(Register& base, + int32_t& offset, + bool is_doubleword, + bool is_float) { + // This method is used to adjust the base register and offset pair + // for a load/store when the offset doesn't fit into int16_t. + // It is assumed that `base + offset` is sufficiently aligned for memory + // operands that are machine word in size or smaller. For doubleword-sized + // operands it's assumed that `base` is a multiple of 8, while `offset` + // may be a multiple of 4 (e.g. 4-byte-aligned long and double arguments + // and spilled variables on the stack accessed relative to the stack + // pointer register). + // We preserve the "alignment" of `offset` by adjusting it by a multiple of 8. + CHECK_NE(base, AT); // Must not overwrite the register `base` while loading `offset`. + + bool doubleword_aligned = IsAligned<kMipsDoublewordSize>(offset); + bool two_accesses = is_doubleword && (!is_float || !doubleword_aligned); + + // IsInt<16> must be passed a signed value, hence the static cast below. + if (IsInt<16>(offset) && + (!two_accesses || IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) { + // Nothing to do: `offset` (and, if needed, `offset + 4`) fits into int16_t. + return; } - switch (type) { - case kLoadSignedByte: - Lb(reg, base, offset); - break; - case kLoadUnsignedByte: - Lbu(reg, base, offset); - break; - case kLoadSignedHalfword: - Lh(reg, base, offset); - break; - case kLoadUnsignedHalfword: - Lhu(reg, base, offset); - break; - case kLoadWord: - Lw(reg, base, offset); - break; - case kLoadDoubleword: - if (reg == base) { - // This will clobber the base when loading the lower register. Since we have to load the - // higher register as well, this will fail. Solution: reverse the order. - Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); - Lw(reg, base, offset); - } else { - Lw(reg, base, offset); - Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; + // Remember the "(mis)alignment" of `offset`, it will be checked at the end. + uint32_t misalignment = offset & (kMipsDoublewordSize - 1); + + // Do not load the whole 32-bit `offset` if it can be represented as + // a sum of two 16-bit signed offsets. This can save an instruction or two. + // To simplify matters, only do this for a symmetric range of offsets from + // about -64KB to about +64KB, allowing further addition of 4 when accessing + // 64-bit variables with two 32-bit accesses. + constexpr int32_t kMinOffsetForSimpleAdjustment = 0x7ff8; // Max int16_t that's a multiple of 8. + constexpr int32_t kMaxOffsetForSimpleAdjustment = 2 * kMinOffsetForSimpleAdjustment; + if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) { + Addiu(AT, base, kMinOffsetForSimpleAdjustment); + offset -= kMinOffsetForSimpleAdjustment; + } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) { + Addiu(AT, base, -kMinOffsetForSimpleAdjustment); + offset += kMinOffsetForSimpleAdjustment; + } else if (IsR6()) { + // On R6 take advantage of the aui instruction, e.g.: + // aui AT, base, offset_high + // lw reg_lo, offset_low(AT) + // lw reg_hi, (offset_low+4)(AT) + // or when offset_low+4 overflows int16_t: + // aui AT, base, offset_high + // addiu AT, AT, 8 + // lw reg_lo, (offset_low-8)(AT) + // lw reg_hi, (offset_low-4)(AT) + int16_t offset_high = High16Bits(offset); + int16_t offset_low = Low16Bits(offset); + offset_high += (offset_low < 0) ? 1 : 0; // Account for offset sign extension in load/store. + Aui(AT, base, offset_high); + if (two_accesses && !IsInt<16>(static_cast<int32_t>(offset_low + kMipsWordSize))) { + // Avoid overflow in the 16-bit offset of the load/store instruction when adding 4. + Addiu(AT, AT, kMipsDoublewordSize); + offset_low -= kMipsDoublewordSize; + } + offset = offset_low; + } else { + // Do not load the whole 32-bit `offset` if it can be represented as + // a sum of three 16-bit signed offsets. This can save an instruction. + // To simplify matters, only do this for a symmetric range of offsets from + // about -96KB to about +96KB, allowing further addition of 4 when accessing + // 64-bit variables with two 32-bit accesses. + constexpr int32_t kMinOffsetForMediumAdjustment = 2 * kMinOffsetForSimpleAdjustment; + constexpr int32_t kMaxOffsetForMediumAdjustment = 3 * kMinOffsetForSimpleAdjustment; + if (0 <= offset && offset <= kMaxOffsetForMediumAdjustment) { + Addiu(AT, base, kMinOffsetForMediumAdjustment / 2); + Addiu(AT, AT, kMinOffsetForMediumAdjustment / 2); + offset -= kMinOffsetForMediumAdjustment; + } else if (-kMaxOffsetForMediumAdjustment <= offset && offset < 0) { + Addiu(AT, base, -kMinOffsetForMediumAdjustment / 2); + Addiu(AT, AT, -kMinOffsetForMediumAdjustment / 2); + offset += kMinOffsetForMediumAdjustment; + } else { + // Now that all shorter options have been exhausted, load the full 32-bit offset. + int32_t loaded_offset = RoundDown(offset, kMipsDoublewordSize); + LoadConst32(AT, loaded_offset); + Addu(AT, AT, base); + offset -= loaded_offset; + } } -} + base = AT; -void MipsAssembler::LoadSFromOffset(FRegister reg, Register base, int32_t offset) { - if (!IsInt<16>(offset)) { - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; + CHECK(IsInt<16>(offset)); + if (two_accesses) { + CHECK(IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize))); } + CHECK_EQ(misalignment, offset & (kMipsDoublewordSize - 1)); +} - Lwc1(reg, base, offset); +void MipsAssembler::LoadFromOffset(LoadOperandType type, + Register reg, + Register base, + int32_t offset) { + LoadFromOffset<>(type, reg, base, offset); +} + +void MipsAssembler::LoadSFromOffset(FRegister reg, Register base, int32_t offset) { + LoadSFromOffset<>(reg, base, offset); } void MipsAssembler::LoadDFromOffset(FRegister reg, Register base, int32_t offset) { - // IsInt<16> must be passed a signed value. - if (!IsInt<16>(offset) || - (!IsAligned<kMipsDoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) { - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; - } - - if (offset & 0x7) { - if (Is32BitFPU()) { - Lwc1(reg, base, offset); - Lwc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); - } else { - // 64-bit FPU. - Lwc1(reg, base, offset); - Lw(T8, base, offset + kMipsWordSize); - Mthc1(T8, reg); - } - } else { - Ldc1(reg, base, offset); - } + LoadDFromOffset<>(reg, base, offset); } void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, @@ -2360,76 +3278,25 @@ void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32 CHECK_EQ(kMipsDoublewordSize, size) << dst; LoadDFromOffset(dst.AsFRegister(), src_register, src_offset); } + } else if (dst.IsDRegister()) { + CHECK_EQ(kMipsDoublewordSize, size) << dst; + LoadDFromOffset(dst.AsOverlappingDRegisterLow(), src_register, src_offset); } } -void MipsAssembler::StoreToOffset(StoreOperandType type, Register reg, Register base, +void MipsAssembler::StoreToOffset(StoreOperandType type, + Register reg, + Register base, int32_t offset) { - // IsInt<16> must be passed a signed value. - if (!IsInt<16>(offset) || - (type == kStoreDoubleword && !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) { - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; - } - - switch (type) { - case kStoreByte: - Sb(reg, base, offset); - break; - case kStoreHalfword: - Sh(reg, base, offset); - break; - case kStoreWord: - Sw(reg, base, offset); - break; - case kStoreDoubleword: - CHECK_NE(reg, base); - CHECK_NE(static_cast<Register>(reg + 1), base); - Sw(reg, base, offset); - Sw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreToOffset<>(type, reg, base, offset); } void MipsAssembler::StoreSToOffset(FRegister reg, Register base, int32_t offset) { - if (!IsInt<16>(offset)) { - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; - } - - Swc1(reg, base, offset); + StoreSToOffset<>(reg, base, offset); } void MipsAssembler::StoreDToOffset(FRegister reg, Register base, int32_t offset) { - // IsInt<16> must be passed a signed value. - if (!IsInt<16>(offset) || - (!IsAligned<kMipsDoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMipsWordSize)))) { - LoadConst32(AT, offset); - Addu(AT, AT, base); - base = AT; - offset = 0; - } - - if (offset & 0x7) { - if (Is32BitFPU()) { - Swc1(reg, base, offset); - Swc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); - } else { - // 64-bit FPU. - Mfhc1(T8, reg); - Swc1(reg, base, offset); - Sw(T8, base, offset + kMipsWordSize); - } - } else { - Sdc1(reg, base, offset); - } + StoreDToOffset<>(reg, base, offset); } static dwarf::Reg DWARFReg(Register reg) { @@ -2438,8 +3305,9 @@ static dwarf::Reg DWARFReg(Register reg) { constexpr size_t kFramePointerSize = 4; -void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, +void MipsAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); DCHECK(!overwriting_); @@ -2453,7 +3321,7 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, cfi_.RelOffset(DWARFReg(RA), stack_offset); for (int i = callee_save_regs.size() - 1; i >= 0; --i) { stack_offset -= kFramePointerSize; - Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister(); + Register reg = callee_save_regs[i].AsMips().AsCoreRegister(); StoreToOffset(kStoreWord, reg, SP, stack_offset); cfi_.RelOffset(DWARFReg(reg), stack_offset); } @@ -2482,7 +3350,7 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, } void MipsAssembler::RemoveFrame(size_t frame_size, - const std::vector<ManagedRegister>& callee_save_regs) { + ArrayRef<const ManagedRegister> callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); DCHECK(!overwriting_); cfi_.RememberState(); @@ -2490,7 +3358,7 @@ void MipsAssembler::RemoveFrame(size_t frame_size, // Pop callee saves and return address. int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize; for (size_t i = 0; i < callee_save_regs.size(); ++i) { - Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister(); + Register reg = callee_save_regs[i].AsMips().AsCoreRegister(); LoadFromOffset(kLoadWord, reg, SP, stack_offset); cfi_.Restore(DWARFReg(reg)); stack_offset += kFramePointerSize; @@ -2498,12 +3366,22 @@ void MipsAssembler::RemoveFrame(size_t frame_size, LoadFromOffset(kLoadWord, RA, SP, stack_offset); cfi_.Restore(DWARFReg(RA)); - // Decrease frame to required size. - DecreaseFrameSize(frame_size); - - // Then jump to the return address. - Jr(RA); - Nop(); + // Adjust the stack pointer in the delay slot if doing so doesn't break CFI. + bool exchange = IsInt<16>(static_cast<int32_t>(frame_size)); + bool reordering = SetReorder(false); + if (exchange) { + // Jump to the return address. + Jr(RA); + // Decrease frame to required size. + DecreaseFrameSize(frame_size); // Single instruction in delay slot. + } else { + // Decrease frame to required size. + DecreaseFrameSize(frame_size); + // Jump to the return address. + Jr(RA); + Nop(); // In delay slot. + } + SetReorder(reordering); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -2547,6 +3425,9 @@ void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { CHECK_EQ(kMipsDoublewordSize, size); StoreDToOffset(src.AsFRegister(), SP, dest.Int32Value()); } + } else if (src.IsDRegister()) { + CHECK_EQ(kMipsDoublewordSize, size); + StoreDToOffset(src.AsOverlappingDRegisterLow(), SP, dest.Int32Value()); } } @@ -2570,26 +3451,17 @@ void MipsAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); } -void MipsAssembler::StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest, uint32_t imm, +void MipsAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, ManagedRegister mscratch) { MipsManagedRegister scratch = mscratch.AsMips(); CHECK(scratch.IsCoreRegister()) << scratch; - // Is this function even referenced anywhere else in the code? - LoadConst32(scratch.AsCoreRegister(), imm); - StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value()); -} - -void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - MipsManagedRegister scratch = mscratch.AsMips(); - CHECK(scratch.IsCoreRegister()) << scratch; Addiu32(scratch.AsCoreRegister(), SP, fr_offs.Int32Value()); StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, thr_offs.Int32Value()); } -void MipsAssembler::StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) { +void MipsAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { StoreToOffset(kStoreWord, SP, S1, thr_offs.Int32Value()); } @@ -2606,8 +3478,7 @@ void MipsAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { return EmitLoad(mdest, SP, src.Int32Value(), size); } -void MipsAssembler::LoadFromThread32(ManagedRegister mdest, - ThreadOffset<kMipsWordSize> src, size_t size) { +void MipsAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -2623,8 +3494,8 @@ void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberO CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister()); LoadFromOffset(kLoadWord, dest.AsCoreRegister(), base.AsMips().AsCoreRegister(), offs.Int32Value()); - if (kPoisonHeapReferences && unpoison_reference) { - Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister()); + if (unpoison_reference) { + MaybeUnpoisonHeapReference(dest.AsCoreRegister()); } } @@ -2635,8 +3506,7 @@ void MipsAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offs base.AsMips().AsCoreRegister(), offs.Int32Value()); } -void MipsAssembler::LoadRawPtrFromThread32(ManagedRegister mdest, - ThreadOffset<kMipsWordSize> offs) { +void MipsAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) { MipsManagedRegister dest = mdest.AsMips(); CHECK(dest.IsCoreRegister()); LoadFromOffset(kLoadWord, dest.AsCoreRegister(), S1, offs.Int32Value()); @@ -2690,9 +3560,9 @@ void MipsAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value()); } -void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs, - ThreadOffset<kMipsWordSize> thr_offs, - ManagedRegister mscratch) { +void MipsAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister mscratch) { MipsManagedRegister scratch = mscratch.AsMips(); CHECK(scratch.IsCoreRegister()) << scratch; LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), @@ -2701,9 +3571,9 @@ void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs, SP, fr_offs.Int32Value()); } -void MipsAssembler::CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { +void MipsAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { MipsManagedRegister scratch = mscratch.AsMips(); CHECK(scratch.IsCoreRegister()) << scratch; LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), @@ -2859,7 +3729,7 @@ void MipsAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister m LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), base.AsCoreRegister(), offset.Int32Value()); Jalr(scratch.AsCoreRegister()); - Nop(); + NopIfNoReordering(); // TODO: place reference map on call. } @@ -2871,12 +3741,12 @@ void MipsAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscrat LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), scratch.AsCoreRegister(), offset.Int32Value()); Jalr(scratch.AsCoreRegister()); - Nop(); + NopIfNoReordering(); // TODO: place reference map on call. } -void MipsAssembler::CallFromThread32(ThreadOffset<kMipsWordSize> offset ATTRIBUTE_UNUSED, - ManagedRegister mscratch ATTRIBUTE_UNUSED) { +void MipsAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "no mips implementation"; } @@ -2893,10 +3763,7 @@ void MipsAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) MipsManagedRegister scratch = mscratch.AsMips(); exception_blocks_.emplace_back(scratch, stack_adjust); LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), - S1, Thread::ExceptionOffset<kMipsWordSize>().Int32Value()); - // TODO: on MIPS32R6 prefer Bnezc(scratch.AsCoreRegister(), slow.Entry()); - // as the NAL instruction (occurring in long R2 branches) may become deprecated. - // For now use common for R2 and R6 instructions as this code must execute on both. + S1, Thread::ExceptionOffset<kMipsPointerSize>().Int32Value()); Bnez(scratch.AsCoreRegister(), exception_blocks_.back().Entry()); } @@ -2911,9 +3778,9 @@ void MipsAssembler::EmitExceptionPoll(MipsExceptionSlowPath* exception) { Move(A0, exception->scratch_.AsCoreRegister()); // Set up call to Thread::Current()->pDeliverException. LoadFromOffset(kLoadWord, T9, S1, - QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pDeliverException).Int32Value()); + QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pDeliverException).Int32Value()); Jr(T9); - Nop(); + NopIfNoReordering(); // Call never returns. Break(); diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index ecb67bd053..463daeb5d7 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -17,16 +17,20 @@ #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ +#include <deque> #include <utility> #include <vector> #include "arch/mips/instruction_set_features_mips.h" +#include "base/arena_containers.h" +#include "base/enums.h" #include "base/macros.h" #include "constants_mips.h" #include "globals.h" #include "managed_register_mips.h" #include "offsets.h" #include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" #include "utils/label.h" namespace art { @@ -79,6 +83,79 @@ class MipsLabel : public Label { DISALLOW_COPY_AND_ASSIGN(MipsLabel); }; +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + MipsLabel* GetLabel() { + return &label_; + } + + const MipsLabel* GetLabel() const { + return &label_; + } + + private: + MipsLabel label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + +// Jump table: table of labels emitted after the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(std::vector<MipsLabel*>&& labels) + : label_(), labels_(std::move(labels)) { + } + + uint32_t GetSize() const { + return static_cast<uint32_t>(labels_.size()) * sizeof(uint32_t); + } + + const std::vector<MipsLabel*>& GetData() const { + return labels_; + } + + MipsLabel* GetLabel() { + return &label_; + } + + const MipsLabel* GetLabel() const { + return &label_; + } + + private: + MipsLabel label_; + std::vector<MipsLabel*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class MipsExceptionSlowPath { public: @@ -100,13 +177,20 @@ class MipsExceptionSlowPath { DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath); }; -class MipsAssembler FINAL : public Assembler { +class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> { public: + using JNIBase = JNIMacroAssembler<PointerSize::k32>; + explicit MipsAssembler(ArenaAllocator* arena, const MipsInstructionSetFeatures* instruction_set_features = nullptr) : Assembler(arena), overwriting_(false), overwrite_location_(0), + reordering_(true), + ds_fsm_state_(kExpectingLabel), + ds_fsm_target_pc_(0), + literals_(arena->Adapter(kArenaAllocAssembler)), + jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0), @@ -114,6 +198,10 @@ class MipsAssembler FINAL : public Assembler { cfi().DelayEmittingAdvancePCs(); } + size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); } + size_t CodePosition() OVERRIDE; + DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } + virtual ~MipsAssembler() { for (auto& branch : branches_) { CHECK(branch.IsResolved()); @@ -174,6 +262,8 @@ class MipsAssembler FINAL : public Assembler { void Srav(Register rd, Register rt, Register rs); void Ext(Register rd, Register rt, int pos, int size); // R2+ void Ins(Register rd, Register rt, int pos, int size); // R2+ + void Lsa(Register rd, Register rs, Register rt, int saPlusOne); // R6 + void ShiftAndAdd(Register dst, Register src_idx, Register src_base, int shamt, Register tmp = AT); void Lb(Register rt, Register rs, uint16_t imm16); void Lh(Register rt, Register rs, uint16_t imm16); @@ -182,7 +272,9 @@ class MipsAssembler FINAL : public Assembler { void Lwr(Register rt, Register rs, uint16_t imm16); void Lbu(Register rt, Register rs, uint16_t imm16); void Lhu(Register rt, Register rs, uint16_t imm16); + void Lwpc(Register rs, uint32_t imm19); // R6 void Lui(Register rt, uint16_t imm16); + void Aui(Register rt, Register rs, uint16_t imm16); // R6 void Sync(uint32_t stype); void Mfhi(Register rd); // R2 void Mflo(Register rd); // R2 @@ -203,7 +295,13 @@ class MipsAssembler FINAL : public Assembler { void Slti(Register rt, Register rs, uint16_t imm16); void Sltiu(Register rt, Register rs, uint16_t imm16); + // Branches and jumps to immediate offsets/addresses do not take care of their + // delay/forbidden slots and generally should not be used directly. This applies + // to the following R2 and R6 branch/jump instructions with imm16, imm21, addr26 + // offsets/addresses. + // Use branches/jumps to labels instead. void B(uint16_t imm16); + void Bal(uint16_t imm16); void Beq(Register rs, Register rt, uint16_t imm16); void Bne(Register rs, Register rt, uint16_t imm16); void Beqz(Register rt, uint16_t imm16); @@ -218,13 +316,18 @@ class MipsAssembler FINAL : public Assembler { void Bc1t(int cc, uint16_t imm16); // R2 void J(uint32_t addr26); void Jal(uint32_t addr26); + // Jalr() and Jr() fill their delay slots when reordering is enabled. + // When reordering is disabled, the delay slots must be filled manually. + // You may use NopIfNoReordering() to fill them when reordering is disabled. void Jalr(Register rd, Register rs); void Jalr(Register rs); void Jr(Register rs); + // Nal() does not fill its delay slot. It must be filled manually. void Nal(); void Auipc(Register rs, uint16_t imm16); // R6 void Addiupc(Register rs, uint32_t imm19); // R6 void Bc(uint32_t imm26); // R6 + void Balc(uint32_t imm26); // R6 void Jic(Register rt, uint16_t imm16); // R6 void Jialc(Register rt, uint16_t imm16); // R6 void Bltc(Register rs, Register rt, uint16_t imm16); // R6 @@ -313,8 +416,16 @@ class MipsAssembler FINAL : public Assembler { void MovfD(FRegister fd, FRegister fs, int cc = 0); // R2 void MovtS(FRegister fd, FRegister fs, int cc = 0); // R2 void MovtD(FRegister fd, FRegister fs, int cc = 0); // R2 + void MovzS(FRegister fd, FRegister fs, Register rt); // R2 + void MovzD(FRegister fd, FRegister fs, Register rt); // R2 + void MovnS(FRegister fd, FRegister fs, Register rt); // R2 + void MovnD(FRegister fd, FRegister fs, Register rt); // R2 void SelS(FRegister fd, FRegister fs, FRegister ft); // R6 void SelD(FRegister fd, FRegister fs, FRegister ft); // R6 + void SeleqzS(FRegister fd, FRegister fs, FRegister ft); // R6 + void SeleqzD(FRegister fd, FRegister fs, FRegister ft); // R6 + void SelnezS(FRegister fd, FRegister fs, FRegister ft); // R6 + void SelnezD(FRegister fd, FRegister fs, FRegister ft); // R6 void ClassS(FRegister fd, FRegister fs); // R6 void ClassD(FRegister fd, FRegister fs); // R6 void MinS(FRegister fd, FRegister fs, FRegister ft); // R6 @@ -348,6 +459,7 @@ class MipsAssembler FINAL : public Assembler { void Break(); void Nop(); + void NopIfNoReordering(); void Move(Register rd, Register rs); void Clear(Register rd); void Not(Register rd, Register rs); @@ -357,14 +469,13 @@ class MipsAssembler FINAL : public Assembler { void LoadConst64(Register reg_hi, Register reg_lo, int64_t value); void LoadDConst64(FRegister rd, int64_t value, Register temp); void LoadSConst32(FRegister r, int32_t value, Register temp); - void StoreConst32ToOffset(int32_t value, Register base, int32_t offset, Register temp); - void StoreConst64ToOffset(int64_t value, Register base, int32_t offset, Register temp); void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT); - // These will generate R2 branches or R6 branches as appropriate. + // These will generate R2 branches or R6 branches as appropriate and take care of + // the delay/forbidden slots. void Bind(MipsLabel* label); void B(MipsLabel* label); - void Jalr(MipsLabel* label, Register indirect_reg); + void Bal(MipsLabel* label); void Beq(Register rs, Register rt, MipsLabel* label); void Bne(Register rs, Register rt, MipsLabel* label); void Beqz(Register rt, MipsLabel* label); @@ -385,6 +496,225 @@ class MipsAssembler FINAL : public Assembler { void Bc1nez(FRegister ft, MipsLabel* label); // R6 void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size); + void AdjustBaseAndOffset(Register& base, + int32_t& offset, + bool is_doubleword, + bool is_float = false); + + private: + // This will be used as an argument for loads/stores + // when there is no need for implicit null checks. + struct NoImplicitNullChecker { + void operator()() const {} + }; + + public: + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreConstToOffset(StoreOperandType type, + int64_t value, + Register base, + int32_t offset, + Register temp, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // We permit `base` and `temp` to coincide (however, we check that neither is AT), + // in which case the `base` register may be overwritten in the process. + CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + Register reg; + // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp` + // to load and hold the value but we can use AT instead as AT hasn't been used yet. + // Otherwise, `temp` can be used for the value. And if `temp` is the same as the + // original `base` (that is, `base` prior to the adjustment), the original `base` + // register will be overwritten. + if (base == temp) { + temp = AT; + } + if (low == 0) { + reg = ZERO; + } else { + reg = temp; + LoadConst32(reg, low); + } + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + Sw(reg, base, offset); + break; + case kStoreDoubleword: + Sw(reg, base, offset); + null_checker(); + if (high == 0) { + reg = ZERO; + } else { + reg = temp; + if (high != low) { + LoadConst32(reg, high); + } + } + Sw(reg, base, offset + kMipsWordSize); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFromOffset(LoadOperandType type, + Register reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + switch (type) { + case kLoadSignedByte: + Lb(reg, base, offset); + break; + case kLoadUnsignedByte: + Lbu(reg, base, offset); + break; + case kLoadSignedHalfword: + Lh(reg, base, offset); + break; + case kLoadUnsignedHalfword: + Lhu(reg, base, offset); + break; + case kLoadWord: + Lw(reg, base, offset); + break; + case kLoadDoubleword: + if (reg == base) { + // This will clobber the base when loading the lower register. Since we have to load the + // higher register as well, this will fail. Solution: reverse the order. + Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); + null_checker(); + Lw(reg, base, offset); + } else { + Lw(reg, base, offset); + null_checker(); + Lw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kLoadDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadSFromOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); + Lwc1(reg, base, offset); + null_checker(); + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadDFromOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); + if (IsAligned<kMipsDoublewordSize>(offset)) { + Ldc1(reg, base, offset); + null_checker(); + } else { + if (Is32BitFPU()) { + Lwc1(reg, base, offset); + null_checker(); + Lwc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); + } else { + // 64-bit FPU. + Lwc1(reg, base, offset); + null_checker(); + Lw(T8, base, offset + kMipsWordSize); + Mthc1(T8, reg); + } + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreToOffset(StoreOperandType type, + Register reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // Must not use AT as `reg`, so as not to overwrite the value being stored + // with the adjusted `base`. + CHECK_NE(reg, AT); + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + Sw(reg, base, offset); + break; + case kStoreDoubleword: + CHECK_NE(reg, base); + CHECK_NE(static_cast<Register>(reg + 1), base); + Sw(reg, base, offset); + null_checker(); + Sw(static_cast<Register>(reg + 1), base, offset + kMipsWordSize); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreSToOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); + Swc1(reg, base, offset); + null_checker(); + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreDToOffset(FRegister reg, + Register base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); + if (IsAligned<kMipsDoublewordSize>(offset)) { + Sdc1(reg, base, offset); + null_checker(); + } else { + if (Is32BitFPU()) { + Swc1(reg, base, offset); + null_checker(); + Swc1(static_cast<FRegister>(reg + 1), base, offset + kMipsWordSize); + } else { + // 64-bit FPU. + Mfhc1(T8, reg); + Swc1(reg, base, offset); + null_checker(); + Sw(T8, base, offset + kMipsWordSize); + } + } + } + void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset); void LoadSFromOffset(FRegister reg, Register base, int32_t offset); void LoadDFromOffset(FRegister reg, Register base, int32_t offset); @@ -400,6 +730,38 @@ class MipsAssembler FINAL : public Assembler { void Pop(Register rd); void PopAndReturn(Register rd, Register rt); + // + // Heap poisoning. + // + + // Poison a heap reference contained in `src` and store it in `dst`. + void PoisonHeapReference(Register dst, Register src) { + // dst = -src. + Subu(dst, ZERO, src); + } + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(Register reg) { + // reg = -reg. + PoisonHeapReference(reg, reg); + } + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(Register reg) { + // reg = -reg. + Subu(reg, ZERO, reg); + } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } + } + void Bind(Label* label) OVERRIDE { Bind(down_cast<MipsLabel*>(label)); } @@ -407,6 +769,60 @@ class MipsAssembler FINAL : public Assembler { UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS"; } + // Don't warn about a different virtual Bind/Jump in the base class. + using JNIBase::Bind; + using JNIBase::Jump; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS32"; + UNREACHABLE(); + } + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS32"; + UNREACHABLE(); + } + // Emit a conditional jump to the label by applying a unary condition test to the register. + void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED, + JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED, + ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS32"; + UNREACHABLE(); + } + + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS32"; + UNREACHABLE(); + } + + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Load label address using the base register (for R2 only) or using PC-relative loads + // (for R6 only; base_reg must be ZERO). To be used with data labels in the literal / + // jump table area only and not with regular code labels. + void LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label); + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Load literal using the base register (for R2 only) or using PC-relative loads + // (for R6 only; base_reg must be ZERO). + void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal); + + // Create a jump table for the given labels that will be emitted when finalizing. + // When the table is emitted, offsets will be relative to the location of the table. + // The table location is determined by the location of its label (the label precedes + // the table data) and should be loaded using LoadLabelAddress(). + JumpTable* CreateJumpTable(std::vector<MipsLabel*>&& labels); + // // Overridden common assembler high-level functionality. // @@ -414,11 +830,11 @@ class MipsAssembler FINAL : public Assembler { // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, + ArrayRef<const ManagedRegister> callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; // Emit code that will remove an activation from the stack. - void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; @@ -431,15 +847,11 @@ class MipsAssembler FINAL : public Assembler { void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest, - uint32_t imm, + void StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) OVERRIDE; - - void StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, @@ -449,9 +861,7 @@ class MipsAssembler FINAL : public Assembler { // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread32(ManagedRegister mdest, - ThreadOffset<kMipsWordSize> src, - size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -462,19 +872,19 @@ class MipsAssembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<kMipsWordSize> offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE; // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread32(FrameOffset fr_offs, - ThreadOffset<kMipsWordSize> thr_offs, - ManagedRegister mscratch) OVERRIDE; - - void CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs, - FrameOffset fr_offs, + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, ManagedRegister mscratch) OVERRIDE; + void CopyRawPtrToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE; @@ -550,7 +960,7 @@ class MipsAssembler FINAL : public Assembler { // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread32(ThreadOffset<kMipsWordSize> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. @@ -564,12 +974,25 @@ class MipsAssembler FINAL : public Assembler { // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS, // must be used instead of MipsLabel::GetPosition()). - uint32_t GetLabelLocation(MipsLabel* label) const; + uint32_t GetLabelLocation(const MipsLabel* label) const; // Get the final position of a label after local fixup based on the old position // recorded before FinalizeCode(). uint32_t GetAdjustedPosition(uint32_t old_position); + // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by + // reading the PC value into a general-purpose register with the NAL instruction and then loading + // literals through this base register. The code generator calls this method (at most once per + // method being compiled) to bind a label to the location for which the PC value is acquired. + // The assembler then computes literal offsets relative to this label. + void BindPcRelBaseLabel(); + + // Returns the location of the label bound with BindPcRelBaseLabel(). + uint32_t GetPcRelBaseLabelLocation() const; + + // Note that PC-relative literal loads are handled as pseudo branches because they need very + // similar relocation and may similarly expand in size to accomodate for larger offsets relative + // to PC. enum BranchCondition { kCondLT, kCondGE, @@ -591,7 +1014,51 @@ class MipsAssembler FINAL : public Assembler { }; friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + // Enables or disables instruction reordering (IOW, automatic filling of delay slots) + // similarly to ".set reorder" / ".set noreorder" in traditional MIPS assembly. + // Returns the last state, which may be useful for temporary enabling/disabling of + // reordering. + bool SetReorder(bool enable); + private: + // Description of the last instruction in terms of input and output registers. + // Used to make the decision of moving the instruction into a delay slot. + struct DelaySlot { + DelaySlot(); + // Encoded instruction that may be used to fill the delay slot or 0 + // (0 conveniently represents NOP). + uint32_t instruction_; + // Mask of output GPRs for the instruction. + uint32_t gpr_outs_mask_; + // Mask of input GPRs for the instruction. + uint32_t gpr_ins_mask_; + // Mask of output FPRs for the instruction. + uint32_t fpr_outs_mask_; + // Mask of input FPRs for the instruction. + uint32_t fpr_ins_mask_; + // Mask of output FPU condition code flags for the instruction. + uint32_t cc_outs_mask_; + // Mask of input FPU condition code flags for the instruction. + uint32_t cc_ins_mask_; + // Branches never operate on the LO and HI registers, hence there's + // no mask for LO and HI. + }; + + // Delay slot finite state machine's (DS FSM's) state. The FSM state is updated + // upon every new instruction and label generated. The FSM detects instructions + // suitable for delay slots and immediately preceded with labels. These are target + // instructions for branches. If an unconditional R2 branch does not get its delay + // slot filled with the immediately preceding instruction, it may instead get the + // slot filled with the target instruction (the branch will need its offset + // incremented past the target instruction). We call this "absorption". The FSM + // records PCs of the target instructions suitable for this optimization. + enum DsFsmState { + kExpectingLabel, + kExpectingInstruction, + kExpectingCommit + }; + friend std::ostream& operator<<(std::ostream& os, const DsFsmState& rhs); + class Branch { public: enum Type { @@ -599,18 +1066,34 @@ class MipsAssembler FINAL : public Assembler { kUncondBranch, kCondBranch, kCall, + // R2 near label. + kLabel, + // R2 near literal. + kLiteral, // R2 long branches. kLongUncondBranch, kLongCondBranch, kLongCall, + // R2 far label. + kFarLabel, + // R2 far literal. + kFarLiteral, // R6 short branches. kR6UncondBranch, kR6CondBranch, kR6Call, + // R6 near label. + kR6Label, + // R6 near literal. + kR6Literal, // R6 long branches. kR6LongUncondBranch, kR6LongCondBranch, kR6LongCall, + // R6 far label. + kR6FarLabel, + // R6 far literal. + kR6FarLiteral, }; // Bit sizes of offsets defined as enums to minimize chance of typos. enum OffsetBits { @@ -625,6 +1108,17 @@ class MipsAssembler FINAL : public Assembler { static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ static constexpr int32_t kMaxBranchLength = 32; static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + // The following two instruction encodings can never legally occur in branch delay + // slots and are used as markers. + // + // kUnfilledDelaySlot means that the branch may use either the preceding or the target + // instruction to fill its delay slot (the latter is only possible with unconditional + // R2 branches and is termed here as "absorption"). + static constexpr uint32_t kUnfilledDelaySlot = 0x10000000; // beq zero, zero, 0. + // kUnfillableDelaySlot means that the branch cannot use an instruction (other than NOP) + // to fill its delay slot. This is only used for unconditional R2 branches to prevent + // absorption of the target instruction when reordering is disabled. + static constexpr uint32_t kUnfillableDelaySlot = 0x13FF0000; // beq ra, ra, 0. struct BranchInfo { // Branch length as a number of 4-byte-long instructions. @@ -645,17 +1139,21 @@ class MipsAssembler FINAL : public Assembler { }; static const BranchInfo branch_info_[/* Type */]; - // Unconditional branch. - Branch(bool is_r6, uint32_t location, uint32_t target); + // Unconditional branch or call. + Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call); // Conditional branch. Branch(bool is_r6, uint32_t location, uint32_t target, BranchCondition condition, Register lhs_reg, - Register rhs_reg = ZERO); - // Call (branch and link) that stores the target address in a given register (i.e. T9). - Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg); + Register rhs_reg); + // Label address (in literal area) or literal. + Branch(bool is_r6, + uint32_t location, + Register dest_reg, + Register base_reg, + Type label_or_literal_type); // Some conditional branches with lhs = rhs are effectively NOPs, while some // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. @@ -673,6 +1171,8 @@ class MipsAssembler FINAL : public Assembler { uint32_t GetTarget() const; uint32_t GetLocation() const; uint32_t GetOldLocation() const; + uint32_t GetPrecedingInstructionLength(Type type) const; + uint32_t GetPrecedingInstructionSize(Type type) const; uint32_t GetLength() const; uint32_t GetOldLength() const; uint32_t GetSize() const; @@ -682,6 +1182,12 @@ class MipsAssembler FINAL : public Assembler { bool IsLong() const; bool IsResolved() const; + // Various helpers for branch delay slot management. + bool CanHaveDelayedInstruction(const DelaySlot& delay_slot) const; + void SetDelayedInstruction(uint32_t instruction); + uint32_t GetDelayedInstruction() const; + void DecrementLocations(); + // Returns the bit size of the signed offset that the branch instruction can handle. OffsetBits GetOffsetSize() const; @@ -731,52 +1237,93 @@ class MipsAssembler FINAL : public Assembler { // that is allowed for short branches. This is for debugging/testing purposes. // max_short_distance = 0 forces all short branches to become long. // Use the implicit default argument when not debugging/testing. - uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + uint32_t PromoteIfNeeded(uint32_t location, + uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); // Returns the location of the instruction(s) containing the offset. uint32_t GetOffsetLocation() const; // Calculates and returns the offset ready for encoding in the branch instruction(s). - uint32_t GetOffset() const; + uint32_t GetOffset(uint32_t location) const; private: // Completes branch construction by determining and recording its type. - void InitializeType(bool is_call, bool is_r6); + void InitializeType(Type initial_type, bool is_r6); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); - uint32_t old_location_; // Offset into assembler buffer in bytes. - uint32_t location_; // Offset into assembler buffer in bytes. - uint32_t target_; // Offset into assembler buffer in bytes. + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + uint32_t lhs_reg_; // Left-hand side register in conditional branches or + // FPU condition code. Destination register in literals. + uint32_t rhs_reg_; // Right-hand side register in conditional branches. + // Base register in literals (ZERO on R6). + BranchCondition condition_; // Condition for conditional branches. - uint32_t lhs_reg_; // Left-hand side register in conditional branches or - // indirect call register. - uint32_t rhs_reg_; // Right-hand side register in conditional branches. - BranchCondition condition_; // Condition for conditional branches. + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. - Type type_; // Current type of the branch. - Type old_type_; // Initial type of the branch. + uint32_t delayed_instruction_; // Encoded instruction for the delay slot or + // kUnfilledDelaySlot if none but fillable or + // kUnfillableDelaySlot if none and unfillable + // (the latter is only used for unconditional R2 + // branches). }; friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); - void EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct); - void EmitI(int opcode, Register rs, Register rt, uint16_t imm); - void EmitI21(int opcode, Register rs, uint32_t imm21); - void EmitI26(int opcode, uint32_t imm26); - void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct); - void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm); + uint32_t EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct); + uint32_t EmitI(int opcode, Register rs, Register rt, uint16_t imm); + uint32_t EmitI21(int opcode, Register rs, uint32_t imm21); + uint32_t EmitI26(int opcode, uint32_t imm26); + uint32_t EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct); + uint32_t EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm); void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16); void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21); void Buncond(MipsLabel* label); void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO); - void Call(MipsLabel* label, Register indirect_reg); + void Call(MipsLabel* label); void FinalizeLabeledBranch(MipsLabel* label); + // Various helpers for branch delay slot management. + void DsFsmInstr(uint32_t instruction, + uint32_t gpr_outs_mask, + uint32_t gpr_ins_mask, + uint32_t fpr_outs_mask, + uint32_t fpr_ins_mask, + uint32_t cc_outs_mask, + uint32_t cc_ins_mask); + void DsFsmInstrNop(uint32_t instruction); + void DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2); + void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3); + void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2); + void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3); + void DsFsmInstrFffr(uint32_t instruction, FRegister in1_out, FRegister in2, Register in3); + void DsFsmInstrRf(uint32_t instruction, Register out, FRegister in); + void DsFsmInstrFr(uint32_t instruction, FRegister out, Register in); + void DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2); + void DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2); + void DsFsmInstrRrrc(uint32_t instruction, Register in1_out, Register in2, int cc_in); + void DsFsmInstrFffc(uint32_t instruction, FRegister in1_out, FRegister in2, int cc_in); + void DsFsmLabel(); + void DsFsmCommitLabel(); + void DsFsmDropLabel(); + void MoveInstructionToDelaySlot(Branch& branch); + bool CanExchangeWithSlt(Register rs, Register rt) const; + void ExchangeWithSlt(const DelaySlot& forwarded_slot); + void GenerateSltForCondBranch(bool unsigned_slt, Register rs, Register rt); + Branch* GetBranch(uint32_t branch_id); const Branch* GetBranch(uint32_t branch_id) const; + uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const; + uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const; + void EmitLiterals(); + void ReserveJumpTableSpace(); + void EmitJumpTables(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -811,7 +1358,30 @@ class MipsAssembler FINAL : public Assembler { // The current overwrite location. uint32_t overwrite_location_; - // Data for AdjustedPosition(), see the description there. + // Whether instruction reordering (IOW, automatic filling of delay slots) is enabled. + bool reordering_; + // Information about the last instruction that may be used to fill a branch delay slot. + DelaySlot delay_slot_; + // Delay slot FSM state. + DsFsmState ds_fsm_state_; + // PC of the current labeled target instruction. + uint32_t ds_fsm_target_pc_; + // PCs of labeled target instructions. + std::vector<uint32_t> ds_fsm_target_pcs_; + + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + + // Jump table list. + ArenaDeque<JumpTable> jump_tables_; + + // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC + // we get PC using the NAL instruction. This label marks the position within the assembler buffer + // that PC (from NAL) points to. + MipsLabel pc_rel_base_label_; + + // Data for GetAdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; uint32_t last_branch_id_; diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc new file mode 100644 index 0000000000..30667efa38 --- /dev/null +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -0,0 +1,919 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "assembler_mips.h" + +#include <map> + +#include "base/stl_util.h" +#include "utils/assembler_test.h" + +#define __ GetAssembler()-> + +namespace art { + +struct MIPSCpuRegisterCompare { + bool operator()(const mips::Register& a, const mips::Register& b) const { + return a < b; + } +}; + +class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, + mips::Register, + mips::FRegister, + uint32_t> { + public: + typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base; + + AssemblerMIPS32r6Test() : + instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r6", nullptr)) { + } + + protected: + // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... + std::string GetArchitectureString() OVERRIDE { + return "mips"; + } + + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details. + return "gcc"; + } + + std::string GetAssemblerParameters() OVERRIDE { + // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6 + // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers. + // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the + // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily). + return " -march=mips32r6 -modd-spreg -Wa,--no-warn" + " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); + } + + std::string GetDisassembleParameters() OVERRIDE { + return " -D -bbinary -mmips:isa32r6"; + } + + mips::MipsAssembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE { + return new (arena) mips::MipsAssembler(arena, instruction_set_features_.get()); + } + + void SetUpHelpers() OVERRIDE { + if (registers_.size() == 0) { + registers_.push_back(new mips::Register(mips::ZERO)); + registers_.push_back(new mips::Register(mips::AT)); + registers_.push_back(new mips::Register(mips::V0)); + registers_.push_back(new mips::Register(mips::V1)); + registers_.push_back(new mips::Register(mips::A0)); + registers_.push_back(new mips::Register(mips::A1)); + registers_.push_back(new mips::Register(mips::A2)); + registers_.push_back(new mips::Register(mips::A3)); + registers_.push_back(new mips::Register(mips::T0)); + registers_.push_back(new mips::Register(mips::T1)); + registers_.push_back(new mips::Register(mips::T2)); + registers_.push_back(new mips::Register(mips::T3)); + registers_.push_back(new mips::Register(mips::T4)); + registers_.push_back(new mips::Register(mips::T5)); + registers_.push_back(new mips::Register(mips::T6)); + registers_.push_back(new mips::Register(mips::T7)); + registers_.push_back(new mips::Register(mips::S0)); + registers_.push_back(new mips::Register(mips::S1)); + registers_.push_back(new mips::Register(mips::S2)); + registers_.push_back(new mips::Register(mips::S3)); + registers_.push_back(new mips::Register(mips::S4)); + registers_.push_back(new mips::Register(mips::S5)); + registers_.push_back(new mips::Register(mips::S6)); + registers_.push_back(new mips::Register(mips::S7)); + registers_.push_back(new mips::Register(mips::T8)); + registers_.push_back(new mips::Register(mips::T9)); + registers_.push_back(new mips::Register(mips::K0)); + registers_.push_back(new mips::Register(mips::K1)); + registers_.push_back(new mips::Register(mips::GP)); + registers_.push_back(new mips::Register(mips::SP)); + registers_.push_back(new mips::Register(mips::FP)); + registers_.push_back(new mips::Register(mips::RA)); + + secondary_register_names_.emplace(mips::Register(mips::ZERO), "zero"); + secondary_register_names_.emplace(mips::Register(mips::AT), "at"); + secondary_register_names_.emplace(mips::Register(mips::V0), "v0"); + secondary_register_names_.emplace(mips::Register(mips::V1), "v1"); + secondary_register_names_.emplace(mips::Register(mips::A0), "a0"); + secondary_register_names_.emplace(mips::Register(mips::A1), "a1"); + secondary_register_names_.emplace(mips::Register(mips::A2), "a2"); + secondary_register_names_.emplace(mips::Register(mips::A3), "a3"); + secondary_register_names_.emplace(mips::Register(mips::T0), "t0"); + secondary_register_names_.emplace(mips::Register(mips::T1), "t1"); + secondary_register_names_.emplace(mips::Register(mips::T2), "t2"); + secondary_register_names_.emplace(mips::Register(mips::T3), "t3"); + secondary_register_names_.emplace(mips::Register(mips::T4), "t4"); + secondary_register_names_.emplace(mips::Register(mips::T5), "t5"); + secondary_register_names_.emplace(mips::Register(mips::T6), "t6"); + secondary_register_names_.emplace(mips::Register(mips::T7), "t7"); + secondary_register_names_.emplace(mips::Register(mips::S0), "s0"); + secondary_register_names_.emplace(mips::Register(mips::S1), "s1"); + secondary_register_names_.emplace(mips::Register(mips::S2), "s2"); + secondary_register_names_.emplace(mips::Register(mips::S3), "s3"); + secondary_register_names_.emplace(mips::Register(mips::S4), "s4"); + secondary_register_names_.emplace(mips::Register(mips::S5), "s5"); + secondary_register_names_.emplace(mips::Register(mips::S6), "s6"); + secondary_register_names_.emplace(mips::Register(mips::S7), "s7"); + secondary_register_names_.emplace(mips::Register(mips::T8), "t8"); + secondary_register_names_.emplace(mips::Register(mips::T9), "t9"); + secondary_register_names_.emplace(mips::Register(mips::K0), "k0"); + secondary_register_names_.emplace(mips::Register(mips::K1), "k1"); + secondary_register_names_.emplace(mips::Register(mips::GP), "gp"); + secondary_register_names_.emplace(mips::Register(mips::SP), "sp"); + secondary_register_names_.emplace(mips::Register(mips::FP), "fp"); + secondary_register_names_.emplace(mips::Register(mips::RA), "ra"); + + fp_registers_.push_back(new mips::FRegister(mips::F0)); + fp_registers_.push_back(new mips::FRegister(mips::F1)); + fp_registers_.push_back(new mips::FRegister(mips::F2)); + fp_registers_.push_back(new mips::FRegister(mips::F3)); + fp_registers_.push_back(new mips::FRegister(mips::F4)); + fp_registers_.push_back(new mips::FRegister(mips::F5)); + fp_registers_.push_back(new mips::FRegister(mips::F6)); + fp_registers_.push_back(new mips::FRegister(mips::F7)); + fp_registers_.push_back(new mips::FRegister(mips::F8)); + fp_registers_.push_back(new mips::FRegister(mips::F9)); + fp_registers_.push_back(new mips::FRegister(mips::F10)); + fp_registers_.push_back(new mips::FRegister(mips::F11)); + fp_registers_.push_back(new mips::FRegister(mips::F12)); + fp_registers_.push_back(new mips::FRegister(mips::F13)); + fp_registers_.push_back(new mips::FRegister(mips::F14)); + fp_registers_.push_back(new mips::FRegister(mips::F15)); + fp_registers_.push_back(new mips::FRegister(mips::F16)); + fp_registers_.push_back(new mips::FRegister(mips::F17)); + fp_registers_.push_back(new mips::FRegister(mips::F18)); + fp_registers_.push_back(new mips::FRegister(mips::F19)); + fp_registers_.push_back(new mips::FRegister(mips::F20)); + fp_registers_.push_back(new mips::FRegister(mips::F21)); + fp_registers_.push_back(new mips::FRegister(mips::F22)); + fp_registers_.push_back(new mips::FRegister(mips::F23)); + fp_registers_.push_back(new mips::FRegister(mips::F24)); + fp_registers_.push_back(new mips::FRegister(mips::F25)); + fp_registers_.push_back(new mips::FRegister(mips::F26)); + fp_registers_.push_back(new mips::FRegister(mips::F27)); + fp_registers_.push_back(new mips::FRegister(mips::F28)); + fp_registers_.push_back(new mips::FRegister(mips::F29)); + fp_registers_.push_back(new mips::FRegister(mips::F30)); + fp_registers_.push_back(new mips::FRegister(mips::F31)); + } + } + + void TearDown() OVERRIDE { + AssemblerTest::TearDown(); + STLDeleteElements(®isters_); + STLDeleteElements(&fp_registers_); + } + + std::vector<mips::Register*> GetRegisters() OVERRIDE { + return registers_; + } + + std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE { + return fp_registers_; + } + + uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { + return imm_value; + } + + std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE { + CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); + return secondary_register_names_[reg]; + } + + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + + void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register, + mips::Register, + mips::MipsLabel*), + const std::string& instr_name) { + mips::MipsLabel label; + (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + private: + std::vector<mips::Register*> registers_; + std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_; + + std::vector<mips::FRegister*> fp_registers_; + std::unique_ptr<const MipsInstructionSetFeatures> instruction_set_features_; +}; + + +TEST_F(AssemblerMIPS32r6Test, Toolchain) { + EXPECT_TRUE(CheckTools()); +} + +TEST_F(AssemblerMIPS32r6Test, MulR6) { + DriverStr(RepeatRRR(&mips::MipsAssembler::MulR6, "mul ${reg1}, ${reg2}, ${reg3}"), "MulR6"); +} + +TEST_F(AssemblerMIPS32r6Test, MuhR6) { + DriverStr(RepeatRRR(&mips::MipsAssembler::MuhR6, "muh ${reg1}, ${reg2}, ${reg3}"), "MuhR6"); +} + +TEST_F(AssemblerMIPS32r6Test, MuhuR6) { + DriverStr(RepeatRRR(&mips::MipsAssembler::MuhuR6, "muhu ${reg1}, ${reg2}, ${reg3}"), "MuhuR6"); +} + +TEST_F(AssemblerMIPS32r6Test, DivR6) { + DriverStr(RepeatRRR(&mips::MipsAssembler::DivR6, "div ${reg1}, ${reg2}, ${reg3}"), "DivR6"); +} + +TEST_F(AssemblerMIPS32r6Test, ModR6) { + DriverStr(RepeatRRR(&mips::MipsAssembler::ModR6, "mod ${reg1}, ${reg2}, ${reg3}"), "ModR6"); +} + +TEST_F(AssemblerMIPS32r6Test, DivuR6) { + DriverStr(RepeatRRR(&mips::MipsAssembler::DivuR6, "divu ${reg1}, ${reg2}, ${reg3}"), "DivuR6"); +} + +TEST_F(AssemblerMIPS32r6Test, ModuR6) { + DriverStr(RepeatRRR(&mips::MipsAssembler::ModuR6, "modu ${reg1}, ${reg2}, ${reg3}"), "ModuR6"); +} + +////////// +// MISC // +////////// + +TEST_F(AssemblerMIPS32r6Test, Aui) { + DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui"); +} + +TEST_F(AssemblerMIPS32r6Test, Auipc) { + DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc"); +} + +TEST_F(AssemblerMIPS32r6Test, Lwpc) { + // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset, + // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`. + // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right + // by 2 positions when encoding, hence `<< 2` to compensate for that shift. + // We capture the value of the immediate with `.set imm, {imm}` because the value is needed + // twice for the sign extension, but `{imm}` is substituted only once. + const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc"); +} + +TEST_F(AssemblerMIPS32r6Test, Addiupc) { + // The comment from the Lwpc() test applies to this Addiupc() test as well. + const char* code = ".set imm, {imm}\naddiupc ${reg}, (imm - ((imm & 0x40000) << 1)) << 2"; + DriverStr(RepeatRIb(&mips::MipsAssembler::Addiupc, 19, code), "Addiupc"); +} + +TEST_F(AssemblerMIPS32r6Test, Bitswap) { + DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); +} + +TEST_F(AssemblerMIPS32r6Test, Lsa) { + DriverStr(RepeatRRRIb(&mips::MipsAssembler::Lsa, + 2, + "lsa ${reg1}, ${reg2}, ${reg3}, {imm}", + 1), + "lsa"); +} + +TEST_F(AssemblerMIPS32r6Test, Seleqz) { + DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"), + "seleqz"); +} + +TEST_F(AssemblerMIPS32r6Test, Selnez) { + DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"), + "selnez"); +} + +TEST_F(AssemblerMIPS32r6Test, ClzR6) { + DriverStr(RepeatRR(&mips::MipsAssembler::ClzR6, "clz ${reg1}, ${reg2}"), "clzR6"); +} + +TEST_F(AssemblerMIPS32r6Test, CloR6) { + DriverStr(RepeatRR(&mips::MipsAssembler::CloR6, "clo ${reg1}, ${reg2}"), "cloR6"); +} + +//////////////////// +// FLOATING POINT // +//////////////////// + +TEST_F(AssemblerMIPS32r6Test, SelS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::SelS, "sel.s ${reg1}, ${reg2}, ${reg3}"), "sel.s"); +} + +TEST_F(AssemblerMIPS32r6Test, SelD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d"); +} + +TEST_F(AssemblerMIPS32r6Test, SeleqzS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::SeleqzS, "seleqz.s ${reg1}, ${reg2}, ${reg3}"), + "seleqz.s"); +} + +TEST_F(AssemblerMIPS32r6Test, SeleqzD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::SeleqzD, "seleqz.d ${reg1}, ${reg2}, ${reg3}"), + "seleqz.d"); +} + +TEST_F(AssemblerMIPS32r6Test, SelnezS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::SelnezS, "selnez.s ${reg1}, ${reg2}, ${reg3}"), + "selnez.s"); +} + +TEST_F(AssemblerMIPS32r6Test, SelnezD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::SelnezD, "selnez.d ${reg1}, ${reg2}, ${reg3}"), + "selnez.d"); +} + +TEST_F(AssemblerMIPS32r6Test, ClassS) { + DriverStr(RepeatFF(&mips::MipsAssembler::ClassS, "class.s ${reg1}, ${reg2}"), "class.s"); +} + +TEST_F(AssemblerMIPS32r6Test, ClassD) { + DriverStr(RepeatFF(&mips::MipsAssembler::ClassD, "class.d ${reg1}, ${reg2}"), "class.d"); +} + +TEST_F(AssemblerMIPS32r6Test, MinS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::MinS, "min.s ${reg1}, ${reg2}, ${reg3}"), "min.s"); +} + +TEST_F(AssemblerMIPS32r6Test, MinD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::MinD, "min.d ${reg1}, ${reg2}, ${reg3}"), "min.d"); +} + +TEST_F(AssemblerMIPS32r6Test, MaxS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::MaxS, "max.s ${reg1}, ${reg2}, ${reg3}"), "max.s"); +} + +TEST_F(AssemblerMIPS32r6Test, MaxD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUnS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpEqS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUeqS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpLtS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUltS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpLeS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUleS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpOrS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUneS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpNeS) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.s"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUnD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpEqD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUeqD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpLtD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUltD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpLeD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUleD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpOrD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpUneD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.d"); +} + +TEST_F(AssemblerMIPS32r6Test, CmpNeD) { + DriverStr(RepeatFFF(&mips::MipsAssembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.d"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadDFromOffset) { + __ LoadDFromOffset(mips::F0, mips::A0, -0x8000); + __ LoadDFromOffset(mips::F0, mips::A0, +0); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FF8); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFB); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFC); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFF); + __ LoadDFromOffset(mips::F0, mips::A0, -0xFFF0); + __ LoadDFromOffset(mips::F0, mips::A0, -0x8008); + __ LoadDFromOffset(mips::F0, mips::A0, -0x8001); + __ LoadDFromOffset(mips::F0, mips::A0, +0x8000); + __ LoadDFromOffset(mips::F0, mips::A0, +0xFFF0); + __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE8); + __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF8); + __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF1); + __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF1); + __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF8); + __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE8); + __ LoadDFromOffset(mips::F0, mips::A0, -0x17FF0); + __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE9); + __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE9); + __ LoadDFromOffset(mips::F0, mips::A0, +0x17FF0); + __ LoadDFromOffset(mips::F0, mips::A0, +0x12345678); + + const char* expected = + "ldc1 $f0, -0x8000($a0)\n" + "ldc1 $f0, 0($a0)\n" + "ldc1 $f0, 0x7FF8($a0)\n" + "lwc1 $f0, 0x7FFB($a0)\n" + "lw $t8, 0x7FFF($a0)\n" + "mthc1 $t8, $f0\n" + "addiu $at, $a0, 0x7FF8\n" + "lwc1 $f0, 4($at)\n" + "lw $t8, 8($at)\n" + "mthc1 $t8, $f0\n" + "addiu $at, $a0, 0x7FF8\n" + "lwc1 $f0, 7($at)\n" + "lw $t8, 11($at)\n" + "mthc1 $t8, $f0\n" + "addiu $at, $a0, -0x7FF8\n" + "ldc1 $f0, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "ldc1 $f0, -0x10($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "lwc1 $f0, -9($at)\n" + "lw $t8, -5($at)\n" + "mthc1 $t8, $f0\n" + "addiu $at, $a0, 0x7FF8\n" + "ldc1 $f0, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "ldc1 $f0, 0x7FF8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "ldc1 $f0, -0x7FE8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "ldc1 $f0, 0x8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "lwc1 $f0, 0xF($at)\n" + "lw $t8, 0x13($at)\n" + "mthc1 $t8, $f0\n" + "aui $at, $a0, 0x1\n" + "lwc1 $f0, -0xF($at)\n" + "lw $t8, -0xB($at)\n" + "mthc1 $t8, $f0\n" + "aui $at, $a0, 0x1\n" + "ldc1 $f0, -0x8($at)\n" + "aui $at, $a0, 0x1\n" + "ldc1 $f0, 0x7FE8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "ldc1 $f0, -0x7FF0($at)\n" + "aui $at, $a0, 0xFFFF\n" + "lwc1 $f0, -0x7FE9($at)\n" + "lw $t8, -0x7FE5($at)\n" + "mthc1 $t8, $f0\n" + "aui $at, $a0, 0x1\n" + "lwc1 $f0, 0x7FE9($at)\n" + "lw $t8, 0x7FED($at)\n" + "mthc1 $t8, $f0\n" + "aui $at, $a0, 0x1\n" + "ldc1 $f0, 0x7FF0($at)\n" + "aui $at, $a0, 0x1234\n" + "ldc1 $f0, 0x5678($at)\n"; + DriverStr(expected, "LoadDFromOffset"); +} + +TEST_F(AssemblerMIPS32r6Test, StoreDToOffset) { + __ StoreDToOffset(mips::F0, mips::A0, -0x8000); + __ StoreDToOffset(mips::F0, mips::A0, +0); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FF8); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FFB); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FFC); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FFF); + __ StoreDToOffset(mips::F0, mips::A0, -0xFFF0); + __ StoreDToOffset(mips::F0, mips::A0, -0x8008); + __ StoreDToOffset(mips::F0, mips::A0, -0x8001); + __ StoreDToOffset(mips::F0, mips::A0, +0x8000); + __ StoreDToOffset(mips::F0, mips::A0, +0xFFF0); + __ StoreDToOffset(mips::F0, mips::A0, -0x17FE8); + __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF8); + __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF1); + __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF1); + __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF8); + __ StoreDToOffset(mips::F0, mips::A0, +0x17FE8); + __ StoreDToOffset(mips::F0, mips::A0, -0x17FF0); + __ StoreDToOffset(mips::F0, mips::A0, -0x17FE9); + __ StoreDToOffset(mips::F0, mips::A0, +0x17FE9); + __ StoreDToOffset(mips::F0, mips::A0, +0x17FF0); + __ StoreDToOffset(mips::F0, mips::A0, +0x12345678); + + const char* expected = + "sdc1 $f0, -0x8000($a0)\n" + "sdc1 $f0, 0($a0)\n" + "sdc1 $f0, 0x7FF8($a0)\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, 0x7FFB($a0)\n" + "sw $t8, 0x7FFF($a0)\n" + "addiu $at, $a0, 0x7FF8\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, 4($at)\n" + "sw $t8, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, 7($at)\n" + "sw $t8, 11($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "sdc1 $f0, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "sdc1 $f0, -0x10($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, -9($at)\n" + "sw $t8, -5($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "sdc1 $f0, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "sdc1 $f0, 0x7FF8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "sdc1 $f0, -0x7FE8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "sdc1 $f0, 0x8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, 0xF($at)\n" + "sw $t8, 0x13($at)\n" + "aui $at, $a0, 0x1\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, -0xF($at)\n" + "sw $t8, -0xB($at)\n" + "aui $at, $a0, 0x1\n" + "sdc1 $f0, -0x8($at)\n" + "aui $at, $a0, 0x1\n" + "sdc1 $f0, 0x7FE8($at)\n" + "aui $at, $a0, 0xFFFF\n" + "sdc1 $f0, -0x7FF0($at)\n" + "aui $at, $a0, 0xFFFF\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, -0x7FE9($at)\n" + "sw $t8, -0x7FE5($at)\n" + "aui $at, $a0, 0x1\n" + "mfhc1 $t8, $f0\n" + "swc1 $f0, 0x7FE9($at)\n" + "sw $t8, 0x7FED($at)\n" + "aui $at, $a0, 0x1\n" + "sdc1 $f0, 0x7FF0($at)\n" + "aui $at, $a0, 0x1234\n" + "sdc1 $f0, 0x5678($at)\n"; + DriverStr(expected, "StoreDToOffset"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "lapc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "addiu $v0, $at, %lo(2f - 1b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + +////////////// +// BRANCHES // +////////////// + +TEST_F(AssemblerMIPS32r6Test, ImpossibleReordering) { + mips::MipsLabel label; + __ SetReorder(true); + __ Bind(&label); + + __ CmpLtD(mips::F0, mips::F2, mips::F4); + __ Bc1nez(mips::F0, &label); // F0 dependency. + + __ MulD(mips::F10, mips::F2, mips::F4); + __ Bc1eqz(mips::F10, &label); // F10 dependency. + + std::string expected = + ".set noreorder\n" + "1:\n" + + "cmp.lt.d $f0, $f2, $f4\n" + "bc1nez $f0, 1b\n" + "nop\n" + + "mul.d $f10, $f2, $f4\n" + "bc1eqz $f10, 1b\n" + "nop\n"; + DriverStr(expected, "ImpossibleReordering"); +} + +TEST_F(AssemblerMIPS32r6Test, Reordering) { + mips::MipsLabel label; + __ SetReorder(true); + __ Bind(&label); + + __ CmpLtD(mips::F0, mips::F2, mips::F4); + __ Bc1nez(mips::F2, &label); + + __ MulD(mips::F0, mips::F2, mips::F4); + __ Bc1eqz(mips::F4, &label); + + std::string expected = + ".set noreorder\n" + "1:\n" + + "bc1nez $f2, 1b\n" + "cmp.lt.d $f0, $f2, $f4\n" + + "bc1eqz $f4, 1b\n" + "mul.d $f0, $f2, $f4\n"; + DriverStr(expected, "Reordering"); +} + +TEST_F(AssemblerMIPS32r6Test, SetReorder) { + mips::MipsLabel label1, label2, label3, label4; + + __ SetReorder(true); + __ Bind(&label1); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1nez(mips::F0, &label1); + + __ SetReorder(false); + __ Bind(&label2); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1nez(mips::F0, &label2); + + __ SetReorder(true); + __ Bind(&label3); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1eqz(mips::F0, &label3); + + __ SetReorder(false); + __ Bind(&label4); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1eqz(mips::F0, &label4); + + std::string expected = + ".set noreorder\n" + "1:\n" + "bc1nez $f0, 1b\n" + "addu $t0, $t1, $t2\n" + + "2:\n" + "addu $t0, $t1, $t2\n" + "bc1nez $f0, 2b\n" + "nop\n" + + "3:\n" + "bc1eqz $f0, 3b\n" + "addu $t0, $t1, $t2\n" + + "4:\n" + "addu $t0, $t1, $t2\n" + "bc1eqz $f0, 4b\n" + "nop\n"; + DriverStr(expected, "SetReorder"); +} + +TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { + mips::MipsLabel label; + __ SetReorder(true); + __ Subu(mips::T0, mips::T1, mips::T2); + __ Bc1nez(mips::F0, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Subu(mips::T0, mips::T1, mips::T2); + __ Bc1eqz(mips::F0, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 2); // 2: account for subu and bc1nez. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "subu $t0, $t1, $t2\n" + "bc1eqz $f0, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "subu $t0, $t1, $t2\n" + "bc1nez $f0, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); +} + +// TODO: MipsAssembler::Bc +// MipsAssembler::Jic +// MipsAssembler::Jialc +// MipsAssembler::Bltc +// MipsAssembler::Bltzc +// MipsAssembler::Bgtzc +// MipsAssembler::Bgec +// MipsAssembler::Bgezc +// MipsAssembler::Blezc +// MipsAssembler::Bltuc +// MipsAssembler::Bgeuc +// MipsAssembler::Beqc +// MipsAssembler::Bnec +// MipsAssembler::Beqzc +// MipsAssembler::Bnezc +// MipsAssembler::Bc1eqz +// MipsAssembler::Bc1nez +// MipsAssembler::Buncond +// MipsAssembler::Bcond +// MipsAssembler::Call + +// TODO: AssemblerMIPS32r6Test.B +// AssemblerMIPS32r6Test.Beq +// AssemblerMIPS32r6Test.Bne +// AssemblerMIPS32r6Test.Beqz +// AssemblerMIPS32r6Test.Bnez +// AssemblerMIPS32r6Test.Bltz +// AssemblerMIPS32r6Test.Bgez +// AssemblerMIPS32r6Test.Blez +// AssemblerMIPS32r6Test.Bgtz +// AssemblerMIPS32r6Test.Blt +// AssemblerMIPS32r6Test.Bge +// AssemblerMIPS32r6Test.Bltu +// AssemblerMIPS32r6Test.Bgeu + +#undef __ + +} // namespace art diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index cec43badf8..c24e1b16fb 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -188,7 +188,7 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register, mips::MipsLabel*), - std::string instr_name) { + const std::string& instr_name) { mips::MipsLabel label; (Base::GetAssembler()->*f)(mips::A0, &label); constexpr size_t kAdduCount1 = 63; @@ -217,7 +217,7 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register, mips::Register, mips::MipsLabel*), - std::string instr_name) { + const std::string& instr_name) { mips::MipsLabel label; (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -561,6 +561,14 @@ TEST_F(AssemblerMIPSTest, NegD) { DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD"); } +TEST_F(AssemblerMIPSTest, FloorWS) { + DriverStr(RepeatFF(&mips::MipsAssembler::FloorWS, "floor.w.s ${reg1}, ${reg2}"), "floor.w.s"); +} + +TEST_F(AssemblerMIPSTest, FloorWD) { + DriverStr(RepeatFF(&mips::MipsAssembler::FloorWD, "floor.w.d ${reg1}, ${reg2}"), "floor.w.d"); +} + TEST_F(AssemblerMIPSTest, CunS) { DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"), "CunS"); @@ -639,6 +647,42 @@ TEST_F(AssemblerMIPSTest, Movt) { DriverStr(RepeatRRIb(&mips::MipsAssembler::Movt, 3, "movt ${reg1}, ${reg2}, $fcc{imm}"), "Movt"); } +TEST_F(AssemblerMIPSTest, MovfS) { + DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfS, 3, "movf.s ${reg1}, ${reg2}, $fcc{imm}"), + "MovfS"); +} + +TEST_F(AssemblerMIPSTest, MovfD) { + DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfD, 3, "movf.d ${reg1}, ${reg2}, $fcc{imm}"), + "MovfD"); +} + +TEST_F(AssemblerMIPSTest, MovtS) { + DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtS, 3, "movt.s ${reg1}, ${reg2}, $fcc{imm}"), + "MovtS"); +} + +TEST_F(AssemblerMIPSTest, MovtD) { + DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtD, 3, "movt.d ${reg1}, ${reg2}, $fcc{imm}"), + "MovtD"); +} + +TEST_F(AssemblerMIPSTest, MovzS) { + DriverStr(RepeatFFR(&mips::MipsAssembler::MovzS, "movz.s ${reg1}, ${reg2}, ${reg3}"), "MovzS"); +} + +TEST_F(AssemblerMIPSTest, MovzD) { + DriverStr(RepeatFFR(&mips::MipsAssembler::MovzD, "movz.d ${reg1}, ${reg2}, ${reg3}"), "MovzD"); +} + +TEST_F(AssemblerMIPSTest, MovnS) { + DriverStr(RepeatFFR(&mips::MipsAssembler::MovnS, "movn.s ${reg1}, ${reg2}, ${reg3}"), "MovnS"); +} + +TEST_F(AssemblerMIPSTest, MovnD) { + DriverStr(RepeatFFR(&mips::MipsAssembler::MovnD, "movn.d ${reg1}, ${reg2}, ${reg3}"), "MovnD"); +} + TEST_F(AssemblerMIPSTest, CvtSW) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW"); } @@ -723,212 +767,538 @@ TEST_F(AssemblerMIPSTest, Not) { DriverStr(RepeatRR(&mips::MipsAssembler::Not, "nor ${reg1}, ${reg2}, $zero"), "Not"); } +TEST_F(AssemblerMIPSTest, Addiu32) { + __ Addiu32(mips::A1, mips::A2, -0x8000); + __ Addiu32(mips::A1, mips::A2, +0); + __ Addiu32(mips::A1, mips::A2, +0x7FFF); + __ Addiu32(mips::A1, mips::A2, -0x10000); + __ Addiu32(mips::A1, mips::A2, -0x8001); + __ Addiu32(mips::A1, mips::A2, +0x8000); + __ Addiu32(mips::A1, mips::A2, +0xFFFE); + __ Addiu32(mips::A1, mips::A2, -0x10001); + __ Addiu32(mips::A1, mips::A2, +0xFFFF); + __ Addiu32(mips::A1, mips::A2, +0x10000); + __ Addiu32(mips::A1, mips::A2, +0x10001); + __ Addiu32(mips::A1, mips::A2, +0x12345678); + + const char* expected = + "addiu $a1, $a2, -0x8000\n" + "addiu $a1, $a2, 0\n" + "addiu $a1, $a2, 0x7FFF\n" + "addiu $at, $a2, -0x8000\n" + "addiu $a1, $at, -0x8000\n" + "addiu $at, $a2, -0x8000\n" + "addiu $a1, $at, -1\n" + "addiu $at, $a2, 0x7FFF\n" + "addiu $a1, $at, 1\n" + "addiu $at, $a2, 0x7FFF\n" + "addiu $a1, $at, 0x7FFF\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0xFFFF\n" + "addu $a1, $a2, $at\n" + "ori $at, $zero, 0xFFFF\n" + "addu $a1, $a2, $at\n" + "lui $at, 1\n" + "addu $a1, $a2, $at\n" + "lui $at, 1\n" + "ori $at, $at, 1\n" + "addu $a1, $a2, $at\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" + "addu $a1, $a2, $at\n"; + DriverStr(expected, "Addiu32"); +} + TEST_F(AssemblerMIPSTest, LoadFromOffset) { - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A0, 0); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 256); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 1000); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x8000); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x10000); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0x12345678); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, -256); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xFFFF8000); - __ LoadFromOffset(mips::kLoadSignedByte, mips::A0, mips::A1, 0xABCDEF00); - - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A0, 0); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 256); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 1000); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x8000); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x10000); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0x12345678); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, -256); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xFFFF8000); - __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A0, mips::A1, 0xABCDEF00); - - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A0, 0); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 256); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 1000); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x8000); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x10000); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0x12345678); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, -256); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xFFFF8000); - __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A0, mips::A1, 0xABCDEF00); - - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A0, 0); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 256); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 1000); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x8000); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x10000); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0x12345678); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, -256); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xFFFF8000); - __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A0, mips::A1, 0xABCDEF00); - - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A0, 0); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 256); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 1000); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x8000); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x10000); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0x12345678); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, -256); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xFFFF8000); - __ LoadFromOffset(mips::kLoadWord, mips::A0, mips::A1, 0xABCDEF00); - - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A0, 0); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A1, 0); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A1, mips::A0, 0); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 256); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 1000); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x8000); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x10000); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0x12345678); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -256); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xFFFF8000); - __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, 0xABCDEF00); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8000); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FF8); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFB); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFC); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x7FFF); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0xFFF0); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8008); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x8001); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x8000); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0xFFF0); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FE8); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x0FFF8); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x0FFF1); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x0FFF1); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x0FFF8); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FE8); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FF0); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, -0x17FE9); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FE9); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x17FF0); + __ LoadFromOffset(mips::kLoadSignedByte, mips::A3, mips::A1, +0x12345678); + + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8000); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FF8); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFB); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFC); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x7FFF); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0xFFF0); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8008); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x8001); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x8000); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0xFFF0); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FE8); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x0FFF8); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x0FFF1); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x0FFF1); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x0FFF8); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FE8); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FF0); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, -0x17FE9); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FE9); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x17FF0); + __ LoadFromOffset(mips::kLoadUnsignedByte, mips::A3, mips::A1, +0x12345678); + + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8000); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FF8); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFB); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFC); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x7FFF); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0xFFF0); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8008); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x8001); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x8000); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0xFFF0); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FE8); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x0FFF8); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x0FFF1); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x0FFF1); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x0FFF8); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FE8); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FF0); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, -0x17FE9); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FE9); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x17FF0); + __ LoadFromOffset(mips::kLoadSignedHalfword, mips::A3, mips::A1, +0x12345678); + + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8000); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FF8); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFB); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFC); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x7FFF); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0xFFF0); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8008); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x8001); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x8000); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0xFFF0); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FE8); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x0FFF8); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x0FFF1); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x0FFF1); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x0FFF8); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FE8); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FF0); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, -0x17FE9); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FE9); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x17FF0); + __ LoadFromOffset(mips::kLoadUnsignedHalfword, mips::A3, mips::A1, +0x12345678); + + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8000); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FF8); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFB); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFC); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x7FFF); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0xFFF0); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8008); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x8001); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x8000); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0xFFF0); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FE8); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x0FFF8); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x0FFF1); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x0FFF1); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x0FFF8); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FE8); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FF0); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, -0x17FE9); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FE9); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x17FF0); + __ LoadFromOffset(mips::kLoadWord, mips::A3, mips::A1, +0x12345678); + + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8000); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FF8); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFB); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFC); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x7FFF); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0xFFF0); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8008); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x8001); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x8000); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0xFFF0); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FE8); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x0FFF8); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x0FFF1); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x0FFF1); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x0FFF8); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FE8); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FF0); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, -0x17FE9); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FE9); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x17FF0); + __ LoadFromOffset(mips::kLoadDoubleword, mips::A0, mips::A2, +0x12345678); const char* expected = - "lb $a0, 0($a0)\n" - "lb $a0, 0($a1)\n" - "lb $a0, 256($a1)\n" - "lb $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "lb $a3, -0x8000($a1)\n" + "lb $a3, 0($a1)\n" + "lb $a3, 0x7FF8($a1)\n" + "lb $a3, 0x7FFB($a1)\n" + "lb $a3, 0x7FFC($a1)\n" + "lb $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "lb $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lb $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lb $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lb $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lb $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lb $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lb $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lb $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lb $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lb $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lb $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lb $a0, 0($at)\n" - "lui $at, 1\n" + "lb $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" + "addu $at, $at, $a1\n" + "lb $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a1\n" + "lb $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a1\n" - "lb $a0, 0($at)\n" + "lb $a3, 0($at)\n" "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "lb $a0, 0($at)\n" - "lb $a0, -256($a1)\n" - "lb $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lb $a3, 0($at)\n" + + "lbu $a3, -0x8000($a1)\n" + "lbu $a3, 0($a1)\n" + "lbu $a3, 0x7FF8($a1)\n" + "lbu $a3, 0x7FFB($a1)\n" + "lbu $a3, 0x7FFC($a1)\n" + "lbu $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "lbu $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lbu $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lbu $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lbu $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lbu $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lbu $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lbu $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lbu $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lbu $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lbu $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lbu $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lb $a0, 0($at)\n" - - "lbu $a0, 0($a0)\n" - "lbu $a0, 0($a1)\n" - "lbu $a0, 256($a1)\n" - "lbu $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "lbu $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" - "lui $at, 1\n" + "lbu $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" "addu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" + "lbu $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" + "addu $at, $at, $a1\n" + "lbu $a3, 0($at)\n" "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" - "lbu $a0, -256($a1)\n" - "lbu $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lbu $a3, 0($at)\n" + + "lh $a3, -0x8000($a1)\n" + "lh $a3, 0($a1)\n" + "lh $a3, 0x7FF8($a1)\n" + "lh $a3, 0x7FFB($a1)\n" + "lh $a3, 0x7FFC($a1)\n" + "lh $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "lh $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lh $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lh $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lh $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lh $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lh $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lh $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lh $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lh $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lh $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lh $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" - - "lh $a0, 0($a0)\n" - "lh $a0, 0($a1)\n" - "lh $a0, 256($a1)\n" - "lh $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "lh $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lh $a0, 0($at)\n" - "lui $at, 1\n" + "lh $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a1\n" + "lh $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a1\n" - "lh $a0, 0($at)\n" + "lh $a3, 0($at)\n" "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "lh $a0, 0($at)\n" - "lh $a0, -256($a1)\n" - "lh $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lh $a3, 0($at)\n" + + "lhu $a3, -0x8000($a1)\n" + "lhu $a3, 0($a1)\n" + "lhu $a3, 0x7FF8($a1)\n" + "lhu $a3, 0x7FFB($a1)\n" + "lhu $a3, 0x7FFC($a1)\n" + "lhu $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "lhu $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lhu $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lhu $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lhu $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lhu $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lhu $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lhu $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lhu $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lhu $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lhu $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lhu $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lh $a0, 0($at)\n" - - "lhu $a0, 0($a0)\n" - "lhu $a0, 0($a1)\n" - "lhu $a0, 256($a1)\n" - "lhu $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "lhu $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" - "lui $at, 1\n" + "lhu $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a1\n" + "lhu $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" + "lhu $a3, 0($at)\n" "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" - "lhu $a0, -256($a1)\n" - "lhu $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lhu $a3, 0($at)\n" + + "lw $a3, -0x8000($a1)\n" + "lw $a3, 0($a1)\n" + "lw $a3, 0x7FF8($a1)\n" + "lw $a3, 0x7FFB($a1)\n" + "lw $a3, 0x7FFC($a1)\n" + "lw $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "lw $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lw $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "lw $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lw $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lw $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lw $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lw $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lw $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lw $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lw $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lw $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" - - "lw $a0, 0($a0)\n" - "lw $a0, 0($a1)\n" - "lw $a0, 256($a1)\n" - "lw $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "lw $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "lw $a0, 0($at)\n" - "lui $at, 1\n" + "lw $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" "addu $at, $at, $a1\n" - "lw $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "lw $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a1\n" - "lw $a0, 0($at)\n" - "lw $a0, -256($a1)\n" - "lw $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lw $a3, 0($at)\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "lw $a0, 0($at)\n" + "lw $a3, 0($at)\n" - "lw $a1, 4($a0)\n" - "lw $a0, 0($a0)\n" - "lw $a0, 0($a1)\n" - "lw $a1, 4($a1)\n" - "lw $a1, 0($a0)\n" - "lw $a2, 4($a0)\n" + "lw $a0, -0x8000($a2)\n" + "lw $a1, -0x7FFC($a2)\n" "lw $a0, 0($a2)\n" "lw $a1, 4($a2)\n" - "lw $a0, 256($a2)\n" - "lw $a1, 260($a2)\n" - "lw $a0, 1000($a2)\n" - "lw $a1, 1004($a2)\n" - "ori $at, $zero, 0x8000\n" + "lw $a0, 0x7FF8($a2)\n" + "lw $a1, 0x7FFC($a2)\n" + "lw $a0, 0x7FFB($a2)\n" + "lw $a1, 0x7FFF($a2)\n" + "addiu $at, $a2, 0x7FF8\n" + "lw $a0, 4($at)\n" + "lw $a1, 8($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "lw $a0, 7($at)\n" + "lw $a1, 11($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "lw $a0, -0x7FF8($at)\n" + "lw $a1, -0x7FF4($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "lw $a0, -0x10($at)\n" + "lw $a1, -0xC($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "lw $a0, -9($at)\n" + "lw $a1, -5($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "lw $a0, 8($at)\n" + "lw $a1, 12($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "lw $a0, 0x7FF8($at)\n" + "lw $a1, 0x7FFC($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lw $a0, -0x7FF8($at)\n" + "lw $a1, -0x7FF4($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lw $a0, -8($at)\n" + "lw $a1, -4($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lw $a0, -1($at)\n" + "lw $a1, 3($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lw $a0, 1($at)\n" + "lw $a1, 5($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lw $a0, 8($at)\n" + "lw $a1, 12($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lw $a0, 0x7FF8($at)\n" + "lw $a1, 0x7FFC($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a2\n" "lw $a0, 0($at)\n" "lw $a1, 4($at)\n" - "lui $at, 1\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a2\n" - "lw $a0, 0($at)\n" - "lw $a1, 4($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "lw $a0, 7($at)\n" + "lw $a1, 11($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a2\n" + "lw $a0, 1($at)\n" + "lw $a1, 5($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a2\n" "lw $a0, 0($at)\n" "lw $a1, 4($at)\n" - "lw $a0, -256($a2)\n" - "lw $a1, -252($a2)\n" - "lw $a0, 0xFFFF8000($a2)\n" - "lw $a1, 0xFFFF8004($a2)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a2\n" "lw $a0, 0($at)\n" "lw $a1, 4($at)\n"; @@ -936,208 +1306,513 @@ TEST_F(AssemblerMIPSTest, LoadFromOffset) { } TEST_F(AssemblerMIPSTest, LoadSFromOffset) { - __ LoadSFromOffset(mips::F0, mips::A0, 0); - __ LoadSFromOffset(mips::F0, mips::A0, 4); - __ LoadSFromOffset(mips::F0, mips::A0, 256); - __ LoadSFromOffset(mips::F0, mips::A0, 0x8000); - __ LoadSFromOffset(mips::F0, mips::A0, 0x10000); - __ LoadSFromOffset(mips::F0, mips::A0, 0x12345678); - __ LoadSFromOffset(mips::F0, mips::A0, -256); - __ LoadSFromOffset(mips::F0, mips::A0, 0xFFFF8000); - __ LoadSFromOffset(mips::F0, mips::A0, 0xABCDEF00); + __ LoadSFromOffset(mips::F2, mips::A0, -0x8000); + __ LoadSFromOffset(mips::F2, mips::A0, +0); + __ LoadSFromOffset(mips::F2, mips::A0, +0x7FF8); + __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFB); + __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFC); + __ LoadSFromOffset(mips::F2, mips::A0, +0x7FFF); + __ LoadSFromOffset(mips::F2, mips::A0, -0xFFF0); + __ LoadSFromOffset(mips::F2, mips::A0, -0x8008); + __ LoadSFromOffset(mips::F2, mips::A0, -0x8001); + __ LoadSFromOffset(mips::F2, mips::A0, +0x8000); + __ LoadSFromOffset(mips::F2, mips::A0, +0xFFF0); + __ LoadSFromOffset(mips::F2, mips::A0, -0x17FE8); + __ LoadSFromOffset(mips::F2, mips::A0, -0x0FFF8); + __ LoadSFromOffset(mips::F2, mips::A0, -0x0FFF1); + __ LoadSFromOffset(mips::F2, mips::A0, +0x0FFF1); + __ LoadSFromOffset(mips::F2, mips::A0, +0x0FFF8); + __ LoadSFromOffset(mips::F2, mips::A0, +0x17FE8); + __ LoadSFromOffset(mips::F2, mips::A0, -0x17FF0); + __ LoadSFromOffset(mips::F2, mips::A0, -0x17FE9); + __ LoadSFromOffset(mips::F2, mips::A0, +0x17FE9); + __ LoadSFromOffset(mips::F2, mips::A0, +0x17FF0); + __ LoadSFromOffset(mips::F2, mips::A0, +0x12345678); const char* expected = - "lwc1 $f0, 0($a0)\n" - "lwc1 $f0, 4($a0)\n" - "lwc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x8000\n" + "lwc1 $f2, -0x8000($a0)\n" + "lwc1 $f2, 0($a0)\n" + "lwc1 $f2, 0x7FF8($a0)\n" + "lwc1 $f2, 0x7FFB($a0)\n" + "lwc1 $f2, 0x7FFC($a0)\n" + "lwc1 $f2, 0x7FFF($a0)\n" + "addiu $at, $a0, -0x7FF8\n" + "lwc1 $f2, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "lwc1 $f2, -0x10($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "lwc1 $f2, -9($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "lwc1 $f2, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "lwc1 $f2, 0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lwc1 $f2, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lwc1 $f2, -8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lwc1 $f2, -1($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lwc1 $f2, 1($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lwc1 $f2, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lwc1 $f2, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" - "lui $at, 1\n" + "lwc1 $f2, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "lwc1 $f2, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" "addu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" - "lwc1 $f0, -256($a0)\n" - "lwc1 $f0, 0xFFFF8000($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lwc1 $f2, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n"; + "lwc1 $f2, 0($at)\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" + "addu $at, $at, $a0\n" + "lwc1 $f2, 0($at)\n"; DriverStr(expected, "LoadSFromOffset"); } - TEST_F(AssemblerMIPSTest, LoadDFromOffset) { - __ LoadDFromOffset(mips::F0, mips::A0, 0); - __ LoadDFromOffset(mips::F0, mips::A0, 4); - __ LoadDFromOffset(mips::F0, mips::A0, 256); - __ LoadDFromOffset(mips::F0, mips::A0, 0x8000); - __ LoadDFromOffset(mips::F0, mips::A0, 0x10000); - __ LoadDFromOffset(mips::F0, mips::A0, 0x12345678); - __ LoadDFromOffset(mips::F0, mips::A0, -256); - __ LoadDFromOffset(mips::F0, mips::A0, 0xFFFF8000); - __ LoadDFromOffset(mips::F0, mips::A0, 0xABCDEF00); + __ LoadDFromOffset(mips::F0, mips::A0, -0x8000); + __ LoadDFromOffset(mips::F0, mips::A0, +0); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FF8); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFB); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFC); + __ LoadDFromOffset(mips::F0, mips::A0, +0x7FFF); + __ LoadDFromOffset(mips::F0, mips::A0, -0xFFF0); + __ LoadDFromOffset(mips::F0, mips::A0, -0x8008); + __ LoadDFromOffset(mips::F0, mips::A0, -0x8001); + __ LoadDFromOffset(mips::F0, mips::A0, +0x8000); + __ LoadDFromOffset(mips::F0, mips::A0, +0xFFF0); + __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE8); + __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF8); + __ LoadDFromOffset(mips::F0, mips::A0, -0x0FFF1); + __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF1); + __ LoadDFromOffset(mips::F0, mips::A0, +0x0FFF8); + __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE8); + __ LoadDFromOffset(mips::F0, mips::A0, -0x17FF0); + __ LoadDFromOffset(mips::F0, mips::A0, -0x17FE9); + __ LoadDFromOffset(mips::F0, mips::A0, +0x17FE9); + __ LoadDFromOffset(mips::F0, mips::A0, +0x17FF0); + __ LoadDFromOffset(mips::F0, mips::A0, +0x12345678); const char* expected = + "ldc1 $f0, -0x8000($a0)\n" "ldc1 $f0, 0($a0)\n" - "lwc1 $f0, 4($a0)\n" - "lwc1 $f1, 8($a0)\n" - "ldc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x8000\n" + "ldc1 $f0, 0x7FF8($a0)\n" + "lwc1 $f0, 0x7FFB($a0)\n" + "lwc1 $f1, 0x7FFF($a0)\n" + "addiu $at, $a0, 0x7FF8\n" + "lwc1 $f0, 4($at)\n" + "lwc1 $f1, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "lwc1 $f0, 7($at)\n" + "lwc1 $f1, 11($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "ldc1 $f0, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "ldc1 $f0, -0x10($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "lwc1 $f0, -9($at)\n" + "lwc1 $f1, -5($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "ldc1 $f0, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "ldc1 $f0, 0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "ldc1 $f0, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "ldc1 $f0, -8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "lwc1 $f0, -1($at)\n" + "lwc1 $f1, 3($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "lwc1 $f0, 1($at)\n" + "lwc1 $f1, 5($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "ldc1 $f0, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "ldc1 $f0, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" "ldc1 $f0, 0($at)\n" - "lui $at, 1\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" - "ldc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "lwc1 $f0, 7($at)\n" + "lwc1 $f1, 11($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a0\n" + "lwc1 $f0, 1($at)\n" + "lwc1 $f1, 5($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a0\n" "ldc1 $f0, 0($at)\n" - "ldc1 $f0, -256($a0)\n" - "ldc1 $f0, 0xFFFF8000($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a0\n" "ldc1 $f0, 0($at)\n"; DriverStr(expected, "LoadDFromOffset"); } TEST_F(AssemblerMIPSTest, StoreToOffset) { - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A0, 0); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 256); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 1000); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x8000); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x10000); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0x12345678); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, -256); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xFFFF8000); - __ StoreToOffset(mips::kStoreByte, mips::A0, mips::A1, 0xABCDEF00); - - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A0, 0); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 256); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 1000); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x8000); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x10000); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0x12345678); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, -256); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xFFFF8000); - __ StoreToOffset(mips::kStoreHalfword, mips::A0, mips::A1, 0xABCDEF00); - - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A0, 0); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 256); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 1000); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x8000); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x10000); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0x12345678); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, -256); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xFFFF8000); - __ StoreToOffset(mips::kStoreWord, mips::A0, mips::A1, 0xABCDEF00); - - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 256); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 1000); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x8000); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x10000); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0x12345678); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -256); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xFFFF8000); - __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, 0xABCDEF00); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8000); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FF8); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFB); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFC); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x7FFF); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0xFFF0); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8008); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x8001); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x8000); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0xFFF0); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FE8); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x0FFF8); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x0FFF1); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x0FFF1); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x0FFF8); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FE8); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FF0); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, -0x17FE9); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FE9); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x17FF0); + __ StoreToOffset(mips::kStoreByte, mips::A3, mips::A1, +0x12345678); + + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8000); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FF8); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFB); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFC); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x7FFF); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0xFFF0); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8008); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x8001); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x8000); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0xFFF0); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FE8); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x0FFF8); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x0FFF1); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x0FFF1); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x0FFF8); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FE8); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FF0); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, -0x17FE9); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FE9); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x17FF0); + __ StoreToOffset(mips::kStoreHalfword, mips::A3, mips::A1, +0x12345678); + + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8000); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FF8); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFB); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFC); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x7FFF); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0xFFF0); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8008); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x8001); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x8000); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0xFFF0); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FE8); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x0FFF8); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x0FFF1); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x0FFF1); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x0FFF8); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FE8); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FF0); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, -0x17FE9); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FE9); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x17FF0); + __ StoreToOffset(mips::kStoreWord, mips::A3, mips::A1, +0x12345678); + + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8000); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FF8); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFB); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFC); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x7FFF); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0xFFF0); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8008); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x8001); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x8000); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0xFFF0); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FE8); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x0FFF8); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x0FFF1); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x0FFF1); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x0FFF8); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FE8); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FF0); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, -0x17FE9); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FE9); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x17FF0); + __ StoreToOffset(mips::kStoreDoubleword, mips::A0, mips::A2, +0x12345678); const char* expected = - "sb $a0, 0($a0)\n" - "sb $a0, 0($a1)\n" - "sb $a0, 256($a1)\n" - "sb $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "sb $a3, -0x8000($a1)\n" + "sb $a3, 0($a1)\n" + "sb $a3, 0x7FF8($a1)\n" + "sb $a3, 0x7FFB($a1)\n" + "sb $a3, 0x7FFC($a1)\n" + "sb $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "sb $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "sb $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "sb $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "sb $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "sb $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sb $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sb $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sb $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sb $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sb $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sb $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "sb $a0, 0($at)\n" - "lui $at, 1\n" + "sb $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "sb $a0, 0($at)\n" + "sb $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a1\n" + "sb $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" + "addu $at, $at, $a1\n" + "sb $a3, 0($at)\n" "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "sb $a0, 0($at)\n" - "sb $a0, -256($a1)\n" - "sb $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "sb $a3, 0($at)\n" + + "sh $a3, -0x8000($a1)\n" + "sh $a3, 0($a1)\n" + "sh $a3, 0x7FF8($a1)\n" + "sh $a3, 0x7FFB($a1)\n" + "sh $a3, 0x7FFC($a1)\n" + "sh $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "sh $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "sh $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "sh $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "sh $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "sh $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sh $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sh $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sh $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sh $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sh $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sh $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "sb $a0, 0($at)\n" - - "sh $a0, 0($a0)\n" - "sh $a0, 0($a1)\n" - "sh $a0, 256($a1)\n" - "sh $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "sh $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "sh $a0, 0($at)\n" - "lui $at, 1\n" + "sh $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" "addu $at, $at, $a1\n" - "sh $a0, 0($at)\n" + "sh $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" + "addu $at, $at, $a1\n" + "sh $a3, 0($at)\n" "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "sh $a0, 0($at)\n" - "sh $a0, -256($a1)\n" - "sh $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "sh $a3, 0($at)\n" + + "sw $a3, -0x8000($a1)\n" + "sw $a3, 0($a1)\n" + "sw $a3, 0x7FF8($a1)\n" + "sw $a3, 0x7FFB($a1)\n" + "sw $a3, 0x7FFC($a1)\n" + "sw $a3, 0x7FFF($a1)\n" + "addiu $at, $a1, -0x7FF8\n" + "sw $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "sw $a3, -0x10($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "sw $a3, -9($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "sw $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "sw $a3, 0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sw $a3, -0x7FF8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sw $a3, -8($at)\n" + "addiu $at, $a1, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sw $a3, -1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sw $a3, 1($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sw $a3, 8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sw $a3, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "sh $a0, 0($at)\n" - - "sw $a0, 0($a0)\n" - "sw $a0, 0($a1)\n" - "sw $a0, 256($a1)\n" - "sw $a0, 1000($a1)\n" - "ori $at, $zero, 0x8000\n" + "sw $a3, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a1\n" - "sw $a0, 0($at)\n" - "lui $at, 1\n" + "sw $a3, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" "addu $at, $at, $a1\n" - "sw $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "sw $a3, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a1\n" - "sw $a0, 0($at)\n" - "sw $a0, -256($a1)\n" - "sw $a0, 0xFFFF8000($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "sw $a3, 0($at)\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a1\n" - "sw $a0, 0($at)\n" + "sw $a3, 0($at)\n" + "sw $a0, -0x8000($a2)\n" + "sw $a1, -0x7FFC($a2)\n" "sw $a0, 0($a2)\n" "sw $a1, 4($a2)\n" - "sw $a0, 256($a2)\n" - "sw $a1, 260($a2)\n" - "sw $a0, 1000($a2)\n" - "sw $a1, 1004($a2)\n" - "ori $at, $zero, 0x8000\n" + "sw $a0, 0x7FF8($a2)\n" + "sw $a1, 0x7FFC($a2)\n" + "sw $a0, 0x7FFB($a2)\n" + "sw $a1, 0x7FFF($a2)\n" + "addiu $at, $a2, 0x7FF8\n" + "sw $a0, 4($at)\n" + "sw $a1, 8($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "sw $a0, 7($at)\n" + "sw $a1, 11($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "sw $a0, -0x7FF8($at)\n" + "sw $a1, -0x7FF4($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "sw $a0, -0x10($at)\n" + "sw $a1, -0xC($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "sw $a0, -9($at)\n" + "sw $a1, -5($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "sw $a0, 8($at)\n" + "sw $a1, 12($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "sw $a0, 0x7FF8($at)\n" + "sw $a1, 0x7FFC($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sw $a0, -0x7FF8($at)\n" + "sw $a1, -0x7FF4($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sw $a0, -8($at)\n" + "sw $a1, -4($at)\n" + "addiu $at, $a2, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sw $a0, -1($at)\n" + "sw $a1, 3($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sw $a0, 1($at)\n" + "sw $a1, 5($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sw $a0, 8($at)\n" + "sw $a1, 12($at)\n" + "addiu $at, $a2, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sw $a0, 0x7FF8($at)\n" + "sw $a1, 0x7FFC($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a2\n" "sw $a0, 0($at)\n" "sw $a1, 4($at)\n" - "lui $at, 1\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a2\n" - "sw $a0, 0($at)\n" - "sw $a1, 4($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "sw $a0, 7($at)\n" + "sw $a1, 11($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a2\n" + "sw $a0, 1($at)\n" + "sw $a1, 5($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a2\n" "sw $a0, 0($at)\n" "sw $a1, 4($at)\n" - "sw $a0, -256($a2)\n" - "sw $a1, -252($a2)\n" - "sw $a0, 0xFFFF8000($a2)\n" - "sw $a1, 0xFFFF8004($a2)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a2\n" "sw $a0, 0($at)\n" "sw $a1, 4($at)\n"; @@ -1145,74 +1820,258 @@ TEST_F(AssemblerMIPSTest, StoreToOffset) { } TEST_F(AssemblerMIPSTest, StoreSToOffset) { - __ StoreSToOffset(mips::F0, mips::A0, 0); - __ StoreSToOffset(mips::F0, mips::A0, 4); - __ StoreSToOffset(mips::F0, mips::A0, 256); - __ StoreSToOffset(mips::F0, mips::A0, 0x8000); - __ StoreSToOffset(mips::F0, mips::A0, 0x10000); - __ StoreSToOffset(mips::F0, mips::A0, 0x12345678); - __ StoreSToOffset(mips::F0, mips::A0, -256); - __ StoreSToOffset(mips::F0, mips::A0, 0xFFFF8000); - __ StoreSToOffset(mips::F0, mips::A0, 0xABCDEF00); + __ StoreSToOffset(mips::F2, mips::A0, -0x8000); + __ StoreSToOffset(mips::F2, mips::A0, +0); + __ StoreSToOffset(mips::F2, mips::A0, +0x7FF8); + __ StoreSToOffset(mips::F2, mips::A0, +0x7FFB); + __ StoreSToOffset(mips::F2, mips::A0, +0x7FFC); + __ StoreSToOffset(mips::F2, mips::A0, +0x7FFF); + __ StoreSToOffset(mips::F2, mips::A0, -0xFFF0); + __ StoreSToOffset(mips::F2, mips::A0, -0x8008); + __ StoreSToOffset(mips::F2, mips::A0, -0x8001); + __ StoreSToOffset(mips::F2, mips::A0, +0x8000); + __ StoreSToOffset(mips::F2, mips::A0, +0xFFF0); + __ StoreSToOffset(mips::F2, mips::A0, -0x17FE8); + __ StoreSToOffset(mips::F2, mips::A0, -0x0FFF8); + __ StoreSToOffset(mips::F2, mips::A0, -0x0FFF1); + __ StoreSToOffset(mips::F2, mips::A0, +0x0FFF1); + __ StoreSToOffset(mips::F2, mips::A0, +0x0FFF8); + __ StoreSToOffset(mips::F2, mips::A0, +0x17FE8); + __ StoreSToOffset(mips::F2, mips::A0, -0x17FF0); + __ StoreSToOffset(mips::F2, mips::A0, -0x17FE9); + __ StoreSToOffset(mips::F2, mips::A0, +0x17FE9); + __ StoreSToOffset(mips::F2, mips::A0, +0x17FF0); + __ StoreSToOffset(mips::F2, mips::A0, +0x12345678); const char* expected = - "swc1 $f0, 0($a0)\n" - "swc1 $f0, 4($a0)\n" - "swc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x8000\n" + "swc1 $f2, -0x8000($a0)\n" + "swc1 $f2, 0($a0)\n" + "swc1 $f2, 0x7FF8($a0)\n" + "swc1 $f2, 0x7FFB($a0)\n" + "swc1 $f2, 0x7FFC($a0)\n" + "swc1 $f2, 0x7FFF($a0)\n" + "addiu $at, $a0, -0x7FF8\n" + "swc1 $f2, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "swc1 $f2, -0x10($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "swc1 $f2, -9($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "swc1 $f2, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "swc1 $f2, 0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "swc1 $f2, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "swc1 $f2, -8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "swc1 $f2, -1($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "swc1 $f2, 1($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "swc1 $f2, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "swc1 $f2, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" - "lui $at, 1\n" + "swc1 $f2, 0($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "swc1 $f2, 7($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" "addu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" - "swc1 $f0, -256($a0)\n" - "swc1 $f0, 0xFFFF8000($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "swc1 $f2, 1($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n"; + "swc1 $f2, 0($at)\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" + "addu $at, $at, $a0\n" + "swc1 $f2, 0($at)\n"; DriverStr(expected, "StoreSToOffset"); } TEST_F(AssemblerMIPSTest, StoreDToOffset) { - __ StoreDToOffset(mips::F0, mips::A0, 0); - __ StoreDToOffset(mips::F0, mips::A0, 4); - __ StoreDToOffset(mips::F0, mips::A0, 256); - __ StoreDToOffset(mips::F0, mips::A0, 0x8000); - __ StoreDToOffset(mips::F0, mips::A0, 0x10000); - __ StoreDToOffset(mips::F0, mips::A0, 0x12345678); - __ StoreDToOffset(mips::F0, mips::A0, -256); - __ StoreDToOffset(mips::F0, mips::A0, 0xFFFF8000); - __ StoreDToOffset(mips::F0, mips::A0, 0xABCDEF00); + __ StoreDToOffset(mips::F0, mips::A0, -0x8000); + __ StoreDToOffset(mips::F0, mips::A0, +0); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FF8); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FFB); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FFC); + __ StoreDToOffset(mips::F0, mips::A0, +0x7FFF); + __ StoreDToOffset(mips::F0, mips::A0, -0xFFF0); + __ StoreDToOffset(mips::F0, mips::A0, -0x8008); + __ StoreDToOffset(mips::F0, mips::A0, -0x8001); + __ StoreDToOffset(mips::F0, mips::A0, +0x8000); + __ StoreDToOffset(mips::F0, mips::A0, +0xFFF0); + __ StoreDToOffset(mips::F0, mips::A0, -0x17FE8); + __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF8); + __ StoreDToOffset(mips::F0, mips::A0, -0x0FFF1); + __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF1); + __ StoreDToOffset(mips::F0, mips::A0, +0x0FFF8); + __ StoreDToOffset(mips::F0, mips::A0, +0x17FE8); + __ StoreDToOffset(mips::F0, mips::A0, -0x17FF0); + __ StoreDToOffset(mips::F0, mips::A0, -0x17FE9); + __ StoreDToOffset(mips::F0, mips::A0, +0x17FE9); + __ StoreDToOffset(mips::F0, mips::A0, +0x17FF0); + __ StoreDToOffset(mips::F0, mips::A0, +0x12345678); const char* expected = + "sdc1 $f0, -0x8000($a0)\n" "sdc1 $f0, 0($a0)\n" - "swc1 $f0, 4($a0)\n" - "swc1 $f1, 8($a0)\n" - "sdc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x8000\n" + "sdc1 $f0, 0x7FF8($a0)\n" + "swc1 $f0, 0x7FFB($a0)\n" + "swc1 $f1, 0x7FFF($a0)\n" + "addiu $at, $a0, 0x7FF8\n" + "swc1 $f0, 4($at)\n" + "swc1 $f1, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "swc1 $f0, 7($at)\n" + "swc1 $f1, 11($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "sdc1 $f0, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "sdc1 $f0, -0x10($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "swc1 $f0, -9($at)\n" + "swc1 $f1, -5($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "sdc1 $f0, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "sdc1 $f0, 0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sdc1 $f0, -0x7FF8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "sdc1 $f0, -8($at)\n" + "addiu $at, $a0, -0x7FF8\n" + "addiu $at, $at, -0x7FF8\n" + "swc1 $f0, -1($at)\n" + "swc1 $f1, 3($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "swc1 $f0, 1($at)\n" + "swc1 $f1, 5($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sdc1 $f0, 8($at)\n" + "addiu $at, $a0, 0x7FF8\n" + "addiu $at, $at, 0x7FF8\n" + "sdc1 $f0, 0x7FF8($at)\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" "sdc1 $f0, 0($at)\n" - "lui $at, 1\n" + "lui $at, 0xFFFE\n" + "ori $at, $at, 0x8010\n" "addu $at, $at, $a0\n" - "sdc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" + "swc1 $f0, 7($at)\n" + "swc1 $f1, 11($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FE8\n" + "addu $at, $at, $a0\n" + "swc1 $f0, 1($at)\n" + "swc1 $f1, 5($at)\n" + "lui $at, 0x1\n" + "ori $at, $at, 0x7FF0\n" "addu $at, $at, $a0\n" "sdc1 $f0, 0($at)\n" - "sdc1 $f0, -256($a0)\n" - "sdc1 $f0, 0xFFFF8000($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" "addu $at, $at, $a0\n" "sdc1 $f0, 0($at)\n"; DriverStr(expected, "StoreDToOffset"); } +TEST_F(AssemblerMIPSTest, StoreConstToOffset) { + __ StoreConstToOffset(mips::kStoreByte, 0xFF, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreHalfword, 0xFFFF, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreWord, 0x12345678, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreDoubleword, 0x123456789ABCDEF0, mips::A1, +0, mips::T8); + + __ StoreConstToOffset(mips::kStoreByte, 0, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreHalfword, 0, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreWord, 0, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreDoubleword, 0, mips::A1, +0, mips::T8); + + __ StoreConstToOffset(mips::kStoreDoubleword, 0x1234567812345678, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreDoubleword, 0x1234567800000000, mips::A1, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreDoubleword, 0x0000000012345678, mips::A1, +0, mips::T8); + + __ StoreConstToOffset(mips::kStoreWord, 0, mips::T8, +0, mips::T8); + __ StoreConstToOffset(mips::kStoreWord, 0x12345678, mips::T8, +0, mips::T8); + + __ StoreConstToOffset(mips::kStoreWord, 0, mips::A1, -0xFFF0, mips::T8); + __ StoreConstToOffset(mips::kStoreWord, 0x12345678, mips::A1, +0xFFF0, mips::T8); + + __ StoreConstToOffset(mips::kStoreWord, 0, mips::T8, -0xFFF0, mips::T8); + __ StoreConstToOffset(mips::kStoreWord, 0x12345678, mips::T8, +0xFFF0, mips::T8); + + const char* expected = + "ori $t8, $zero, 0xFF\n" + "sb $t8, 0($a1)\n" + "ori $t8, $zero, 0xFFFF\n" + "sh $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 0($a1)\n" + "lui $t8, 0x9ABC\n" + "ori $t8, $t8, 0xDEF0\n" + "sw $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 4($a1)\n" + + "sb $zero, 0($a1)\n" + "sh $zero, 0($a1)\n" + "sw $zero, 0($a1)\n" + "sw $zero, 0($a1)\n" + "sw $zero, 4($a1)\n" + + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 0($a1)\n" + "sw $t8, 4($a1)\n" + "sw $zero, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 4($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 0($a1)\n" + "sw $zero, 4($a1)\n" + + "sw $zero, 0($t8)\n" + "lui $at, 0x1234\n" + "ori $at, $at, 0x5678\n" + "sw $at, 0($t8)\n" + + "addiu $at, $a1, -0x7FF8\n" + "sw $zero, -0x7FF8($at)\n" + "addiu $at, $a1, 0x7FF8\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 0x7FF8($at)\n" + + "addiu $at, $t8, -0x7FF8\n" + "sw $zero, -0x7FF8($at)\n" + "addiu $at, $t8, 0x7FF8\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sw $t8, 0x7FF8($at)\n"; + DriverStr(expected, "StoreConstToOffset"); +} + TEST_F(AssemblerMIPSTest, B) { mips::MipsLabel label1, label2; __ B(&label1); @@ -1245,14 +2104,17 @@ TEST_F(AssemblerMIPSTest, B) { } TEST_F(AssemblerMIPSTest, Beq) { + __ SetReorder(false); BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq"); } TEST_F(AssemblerMIPSTest, Bne) { + __ SetReorder(false); BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne"); } TEST_F(AssemblerMIPSTest, Beqz) { + __ SetReorder(false); mips::MipsLabel label; __ Beqz(mips::A0, &label); constexpr size_t kAdduCount1 = 63; @@ -1279,6 +2141,7 @@ TEST_F(AssemblerMIPSTest, Beqz) { } TEST_F(AssemblerMIPSTest, Bnez) { + __ SetReorder(false); mips::MipsLabel label; __ Bnez(mips::A0, &label); constexpr size_t kAdduCount1 = 63; @@ -1305,22 +2168,27 @@ TEST_F(AssemblerMIPSTest, Bnez) { } TEST_F(AssemblerMIPSTest, Bltz) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz"); } TEST_F(AssemblerMIPSTest, Bgez) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez"); } TEST_F(AssemblerMIPSTest, Blez) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez"); } TEST_F(AssemblerMIPSTest, Bgtz) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz"); } TEST_F(AssemblerMIPSTest, Blt) { + __ SetReorder(false); mips::MipsLabel label; __ Blt(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -1349,6 +2217,7 @@ TEST_F(AssemblerMIPSTest, Blt) { } TEST_F(AssemblerMIPSTest, Bge) { + __ SetReorder(false); mips::MipsLabel label; __ Bge(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -1377,6 +2246,7 @@ TEST_F(AssemblerMIPSTest, Bge) { } TEST_F(AssemblerMIPSTest, Bltu) { + __ SetReorder(false); mips::MipsLabel label; __ Bltu(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -1405,6 +2275,7 @@ TEST_F(AssemblerMIPSTest, Bltu) { } TEST_F(AssemblerMIPSTest, Bgeu) { + __ SetReorder(false); mips::MipsLabel label; __ Bgeu(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -1433,6 +2304,7 @@ TEST_F(AssemblerMIPSTest, Bgeu) { } TEST_F(AssemblerMIPSTest, Bc1f) { + __ SetReorder(false); mips::MipsLabel label; __ Bc1f(0, &label); constexpr size_t kAdduCount1 = 63; @@ -1459,6 +2331,7 @@ TEST_F(AssemblerMIPSTest, Bc1f) { } TEST_F(AssemblerMIPSTest, Bc1t) { + __ SetReorder(false); mips::MipsLabel label; __ Bc1t(0, &label); constexpr size_t kAdduCount1 = 63; @@ -1484,6 +2357,531 @@ TEST_F(AssemblerMIPSTest, Bc1t) { DriverStr(expected, "Bc1t"); } +/////////////////////// +// Loading Constants // +/////////////////////// + +TEST_F(AssemblerMIPSTest, LoadConst32) { + // IsUint<16>(value) + __ LoadConst32(mips::V0, 0); + __ LoadConst32(mips::V0, 65535); + // IsInt<16>(value) + __ LoadConst32(mips::V0, -1); + __ LoadConst32(mips::V0, -32768); + // Everything else + __ LoadConst32(mips::V0, 65536); + __ LoadConst32(mips::V0, 65537); + __ LoadConst32(mips::V0, 2147483647); + __ LoadConst32(mips::V0, -32769); + __ LoadConst32(mips::V0, -65536); + __ LoadConst32(mips::V0, -65537); + __ LoadConst32(mips::V0, -2147483647); + __ LoadConst32(mips::V0, -2147483648); + + const char* expected = + // IsUint<16>(value) + "ori $v0, $zero, 0\n" // __ LoadConst32(mips::V0, 0); + "ori $v0, $zero, 65535\n" // __ LoadConst32(mips::V0, 65535); + // IsInt<16>(value) + "addiu $v0, $zero, -1\n" // __ LoadConst32(mips::V0, -1); + "addiu $v0, $zero, -32768\n" // __ LoadConst32(mips::V0, -32768); + // Everything else + "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65536); + "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65537); + "ori $v0, 1\n" // " + "lui $v0, 32767\n" // __ LoadConst32(mips::V0, 2147483647); + "ori $v0, 65535\n" // " + "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -32769); + "ori $v0, 32767\n" // " + "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -65536); + "lui $v0, 65534\n" // __ LoadConst32(mips::V0, -65537); + "ori $v0, 65535\n" // " + "lui $v0, 32768\n" // __ LoadConst32(mips::V0, -2147483647); + "ori $v0, 1\n" // " + "lui $v0, 32768\n"; // __ LoadConst32(mips::V0, -2147483648); + DriverStr(expected, "LoadConst32"); +} + +TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) { + mips::MipsLabel label; + __ BindPcRelBaseLabel(); + __ LoadLabelAddress(mips::V0, mips::V1, &label); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "addiu $v0, $v1, %lo(2f - 1b)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) { + mips::MipsLabel label; + __ BindPcRelBaseLabel(); + __ LoadLabelAddress(mips::V0, mips::V1, &label); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "ori $at, $at, %lo(2f - 1b)\n" + "addu $v0, $at, $v1\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); +} + +TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + + std::string expected = + "1:\n" + "lw $v0, %lo(2f - 1b)($v1)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "addu $at, $at, $v1\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + +TEST_F(AssemblerMIPSTest, ImpossibleReordering) { + mips::MipsLabel label1, label2; + __ SetReorder(true); + + __ B(&label1); // No preceding or target instruction for the delay slot. + + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bind(&label1); + __ B(&label1); // The preceding label prevents moving Addu into the delay slot. + __ B(&label1); // No preceding or target instruction for the delay slot. + + __ Addu(mips::T0, mips::T1, mips::T2); + __ Beqz(mips::T0, &label1); // T0 dependency. + + __ Or(mips::T1, mips::T2, mips::T3); + __ Bne(mips::T2, mips::T1, &label1); // T1 dependency. + + __ And(mips::T0, mips::T1, mips::T2); + __ Blt(mips::T1, mips::T0, &label1); // T0 dependency. + + __ Xor(mips::AT, mips::T0, mips::T1); + __ Bge(mips::T1, mips::T0, &label1); // AT dependency. + + __ Subu(mips::T0, mips::T1, mips::AT); + __ Bltu(mips::T1, mips::T0, &label1); // AT dependency. + + __ ColtS(1, mips::F2, mips::F4); + __ Bc1t(1, &label1); // cc1 dependency. + + __ Move(mips::T0, mips::RA); + __ Bal(&label1); // RA dependency. + + __ Lw(mips::RA, mips::T0, 0); + __ Bal(&label1); // RA dependency. + + __ LlR2(mips::T9, mips::T0, 0); + __ Jalr(mips::T9); // T9 dependency. + + __ Sw(mips::RA, mips::T0, 0); + __ Jalr(mips::T9); // RA dependency. + + __ Lw(mips::T1, mips::T0, 0); + __ Jalr(mips::T1, mips::T9); // T1 dependency. + + __ ScR2(mips::T9, mips::T0, 0); + __ Jr(mips::T9); // T9 dependency. + + __ Bind(&label2); + + __ Bnez(mips::T0, &label2); // No preceding instruction for the delay slot. + + __ Bgeu(mips::T1, mips::T0, &label2); // No preceding instruction for the delay slot. + + __ Bc1f(2, &label2); // No preceding instruction for the delay slot. + + __ Bal(&label2); // No preceding instruction for the delay slot. + + __ Jalr(mips::T9); // No preceding instruction for the delay slot. + + __ Addu(mips::T0, mips::T1, mips::T2); + __ CodePosition(); // Drops the delay slot candidate (the last instruction). + __ Beq(mips::T1, mips::T2, &label2); // No preceding or target instruction for the delay slot. + + std::string expected = + ".set noreorder\n" + "b 1f\n" + "nop\n" + + "addu $t0, $t1, $t2\n" + "1:\n" + "b 1b\n" + "nop\n" + "b 1b\n" + "nop\n" + + "addu $t0, $t1, $t2\n" + "beq $zero, $t0, 1b\n" + "nop\n" + + "or $t1, $t2, $t3\n" + "bne $t2, $t1, 1b\n" + "nop\n" + + "and $t0, $t1, $t2\n" + "slt $at, $t1, $t0\n" + "bne $zero, $at, 1b\n" + "nop\n" + + "xor $at, $t0, $t1\n" + "slt $at, $t1, $t0\n" + "beq $zero, $at, 1b\n" + "nop\n" + + "subu $t0, $t1, $at\n" + "sltu $at, $t1, $t0\n" + "bne $zero, $at, 1b\n" + "nop\n" + + "c.olt.s $fcc1, $f2, $f4\n" + "bc1t $fcc1, 1b\n" + "nop\n" + + "or $t0, $ra, $zero\n" + "bal 1b\n" + "nop\n" + + "lw $ra, 0($t0)\n" + "bal 1b\n" + "nop\n" + + "ll $t9, 0($t0)\n" + "jalr $t9\n" + "nop\n" + + "sw $ra, 0($t0)\n" + "jalr $t9\n" + "nop\n" + + "lw $t1, 0($t0)\n" + "jalr $t1, $t9\n" + "nop\n" + + "sc $t9, 0($t0)\n" + "jalr $zero, $t9\n" + "nop\n" + + "2:\n" + + "bne $zero, $t0, 2b\n" + "nop\n" + + "sltu $at, $t1, $t0\n" + "beq $zero, $at, 2b\n" + "nop\n" + + "bc1f $fcc2, 2b\n" + "nop\n" + + "bal 2b\n" + "nop\n" + + "jalr $t9\n" + "nop\n" + + "addu $t0, $t1, $t2\n" + "beq $t1, $t2, 2b\n" + "nop\n"; + DriverStr(expected, "ImpossibleReordering"); +} + +TEST_F(AssemblerMIPSTest, Reordering) { + mips::MipsLabel label1, label2; + __ SetReorder(true); + + __ Bind(&label1); + __ Bind(&label2); + + __ Addu(mips::T0, mips::T1, mips::T2); + __ Beqz(mips::T1, &label1); + + __ Or(mips::T1, mips::T2, mips::T3); + __ Bne(mips::T2, mips::T3, &label1); + + __ And(mips::T0, mips::T1, mips::T2); + __ Blt(mips::T1, mips::T2, &label1); + + __ Xor(mips::T2, mips::T0, mips::T1); + __ Bge(mips::T1, mips::T0, &label1); + + __ Subu(mips::T2, mips::T1, mips::T0); + __ Bltu(mips::T1, mips::T0, &label1); + + __ ColtS(0, mips::F2, mips::F4); + __ Bc1t(1, &label1); + + __ Move(mips::T0, mips::T1); + __ Bal(&label1); + + __ LlR2(mips::T1, mips::T0, 0); + __ Jalr(mips::T9); + + __ ScR2(mips::T1, mips::T0, 0); + __ Jr(mips::T9); + + std::string expected = + ".set noreorder\n" + "1:\n" + + "beq $zero, $t1, 1b\n" + "addu $t0, $t1, $t2\n" + + "bne $t2, $t3, 1b\n" + "or $t1, $t2, $t3\n" + + "slt $at, $t1, $t2\n" + "bne $zero, $at, 1b\n" + "and $t0, $t1, $t2\n" + + "slt $at, $t1, $t0\n" + "beq $zero, $at, 1b\n" + "xor $t2, $t0, $t1\n" + + "sltu $at, $t1, $t0\n" + "bne $zero, $at, 1b\n" + "subu $t2, $t1, $t0\n" + + "bc1t $fcc1, 1b\n" + "c.olt.s $fcc0, $f2, $f4\n" + + "bal 1b\n" + "or $t0, $t1, $zero\n" + + "jalr $t9\n" + "ll $t1, 0($t0)\n" + + "jalr $zero, $t9\n" + "sc $t1, 0($t0)\n"; + DriverStr(expected, "Reordering"); +} + +TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) { + mips::MipsLabel label1, label2, label3, label4, label5, label6; + __ SetReorder(true); + + __ B(&label1); + __ Bind(&label1); + __ Addu(mips::T0, mips::T1, mips::T2); + + __ Bind(&label2); + __ Xor(mips::T0, mips::T1, mips::T2); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bind(&label3); // Prevents reordering ADDU above with B below. + __ B(&label2); + + __ B(&label4); + __ Bind(&label4); + __ Addu(mips::T0, mips::T1, mips::T2); + __ CodePosition(); // Prevents absorbing ADDU above. + + __ B(&label5); + __ Bind(&label5); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bind(&label6); + __ CodePosition(); // Even across Bind(), CodePosition() prevents absorbing the ADDU above. + + std::string expected = + ".set noreorder\n" + "b 1f\n" + "addu $t0, $t1, $t2\n" + "addu $t0, $t1, $t2\n" + "1:\n" + + "xor $t0, $t1, $t2\n" + "2:\n" + "addu $t0, $t1, $t2\n" + "b 2b\n" + "xor $t0, $t1, $t2\n" + + "b 4f\n" + "nop\n" + "4:\n" + "addu $t0, $t1, $t2\n" + + "b 5f\n" + "nop\n" + "5:\n" + "addu $t0, $t1, $t2\n"; + DriverStr(expected, "AbsorbTargetInstruction"); +} + +TEST_F(AssemblerMIPSTest, SetReorder) { + mips::MipsLabel label1, label2, label3, label4, label5, label6; + + __ SetReorder(true); + __ Bind(&label1); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label1); + __ B(&label5); + __ B(&label6); + + __ SetReorder(false); + __ Bind(&label2); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label2); + __ B(&label5); + __ B(&label6); + + __ SetReorder(true); + __ Bind(&label3); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label3); + __ B(&label5); + __ B(&label6); + + __ SetReorder(false); + __ Bind(&label4); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label4); + __ B(&label5); + __ B(&label6); + + __ SetReorder(true); + __ Bind(&label5); + __ Subu(mips::T0, mips::T1, mips::T2); + + __ SetReorder(false); + __ Bind(&label6); + __ Xor(mips::T0, mips::T1, mips::T2); + + std::string expected = + ".set noreorder\n" + "1:\n" + "b 1b\n" + "addu $t0, $t1, $t2\n" + "b 55f\n" + "subu $t0, $t1, $t2\n" + "b 6f\n" + "nop\n" + + "2:\n" + "addu $t0, $t1, $t2\n" + "b 2b\n" + "nop\n" + "b 5f\n" + "nop\n" + "b 6f\n" + "nop\n" + + "3:\n" + "b 3b\n" + "addu $t0, $t1, $t2\n" + "b 55f\n" + "subu $t0, $t1, $t2\n" + "b 6f\n" + "nop\n" + + "4:\n" + "addu $t0, $t1, $t2\n" + "b 4b\n" + "nop\n" + "b 5f\n" + "nop\n" + "b 6f\n" + "nop\n" + + "5:\n" + "subu $t0, $t1, $t2\n" + "55:\n" + "6:\n" + "xor $t0, $t1, $t2\n"; + DriverStr(expected, "SetReorder"); +} + +TEST_F(AssemblerMIPSTest, LongBranchReorder) { + mips::MipsLabel label; + __ SetReorder(true); + __ Subu(mips::T0, mips::T1, mips::T2); + __ B(&label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Subu(mips::T0, mips::T1, mips::T2); + __ B(&label); + + // Account for 5 extra instructions: ori, addu, lw, jalr, addiu. + uint32_t offset_forward = (kAdduCount1 + 5) * sizeof(uint32_t); + // Account for 5 extra instructions: subu, addiu, sw, nal, lui. + uint32_t offset_back = -(kAdduCount1 + 5) * sizeof(uint32_t); + + std::ostringstream oss; + oss << + ".set noreorder\n" + "subu $t0, $t1, $t2\n" + "addiu $sp, $sp, -4\n" + "sw $ra, 0($sp)\n" + "bltzal $zero, .+4\n" + "lui $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "ori $at, $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "addu $at, $at, $ra\n" + "lw $ra, 0($sp)\n" + "jalr $zero, $at\n" + "addiu $sp, $sp, 4\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "subu $t0, $t1, $t2\n" + "addiu $sp, $sp, -4\n" + "sw $ra, 0($sp)\n" + "bltzal $zero, .+4\n" + "lui $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "ori $at, $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "addu $at, $at, $ra\n" + "lw $ra, 0($sp)\n" + "jalr $zero, $at\n" + "addiu $sp, $sp, 4\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBranchReorder"); +} + #undef __ } // namespace art diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h index 5e7ed11f51..66204e70e3 100644 --- a/compiler/utils/mips/managed_register_mips.h +++ b/compiler/utils/mips/managed_register_mips.h @@ -87,70 +87,70 @@ const int kNumberOfAllocIds = // There is a one-to-one mapping between ManagedRegister and register id. class MipsManagedRegister : public ManagedRegister { public: - Register AsCoreRegister() const { + constexpr Register AsCoreRegister() const { CHECK(IsCoreRegister()); return static_cast<Register>(id_); } - FRegister AsFRegister() const { + constexpr FRegister AsFRegister() const { CHECK(IsFRegister()); return static_cast<FRegister>(id_ - kNumberOfCoreRegIds); } - DRegister AsDRegister() const { + constexpr DRegister AsDRegister() const { CHECK(IsDRegister()); return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfFRegIds); } - FRegister AsOverlappingDRegisterLow() const { + constexpr FRegister AsOverlappingDRegisterLow() const { CHECK(IsOverlappingDRegister()); DRegister d_reg = AsDRegister(); return static_cast<FRegister>(d_reg * 2); } - FRegister AsOverlappingDRegisterHigh() const { + constexpr FRegister AsOverlappingDRegisterHigh() const { CHECK(IsOverlappingDRegister()); DRegister d_reg = AsDRegister(); return static_cast<FRegister>(d_reg * 2 + 1); } - Register AsRegisterPairLow() const { + constexpr Register AsRegisterPairLow() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdLow(). return FromRegId(AllocIdLow()).AsCoreRegister(); } - Register AsRegisterPairHigh() const { + constexpr Register AsRegisterPairHigh() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdHigh(). return FromRegId(AllocIdHigh()).AsCoreRegister(); } - bool IsCoreRegister() const { + constexpr bool IsCoreRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfCoreRegIds); } - bool IsFRegister() const { + constexpr bool IsFRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - kNumberOfCoreRegIds; return (0 <= test) && (test < kNumberOfFRegIds); } - bool IsDRegister() const { + constexpr bool IsDRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds); return (0 <= test) && (test < kNumberOfDRegIds); } // Returns true if this DRegister overlaps FRegisters. - bool IsOverlappingDRegister() const { + constexpr bool IsOverlappingDRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds); return (0 <= test) && (test < kNumberOfOverlappingDRegIds); } - bool IsRegisterPair() const { + constexpr bool IsRegisterPair() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds); @@ -164,32 +164,32 @@ class MipsManagedRegister : public ManagedRegister { // then false is returned. bool Overlaps(const MipsManagedRegister& other) const; - static MipsManagedRegister FromCoreRegister(Register r) { + static constexpr MipsManagedRegister FromCoreRegister(Register r) { CHECK_NE(r, kNoRegister); return FromRegId(r); } - static MipsManagedRegister FromFRegister(FRegister r) { + static constexpr MipsManagedRegister FromFRegister(FRegister r) { CHECK_NE(r, kNoFRegister); return FromRegId(r + kNumberOfCoreRegIds); } - static MipsManagedRegister FromDRegister(DRegister r) { + static constexpr MipsManagedRegister FromDRegister(DRegister r) { CHECK_NE(r, kNoDRegister); return FromRegId(r + kNumberOfCoreRegIds + kNumberOfFRegIds); } - static MipsManagedRegister FromRegisterPair(RegisterPair r) { + static constexpr MipsManagedRegister FromRegisterPair(RegisterPair r) { CHECK_NE(r, kNoRegisterPair); return FromRegId(r + (kNumberOfCoreRegIds + kNumberOfFRegIds + kNumberOfDRegIds)); } private: - bool IsValidManagedRegister() const { + constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); } - int RegId() const { + constexpr int RegId() const { CHECK(!IsNoRegister()); return id_; } @@ -205,9 +205,9 @@ class MipsManagedRegister : public ManagedRegister { friend class ManagedRegister; - explicit MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + explicit constexpr MipsManagedRegister(int reg_id) : ManagedRegister(reg_id) {} - static MipsManagedRegister FromRegId(int reg_id) { + static constexpr MipsManagedRegister FromRegId(int reg_id) { MipsManagedRegister reg(reg_id); CHECK(reg.IsValidManagedRegister()); return reg; @@ -218,7 +218,7 @@ std::ostream& operator<<(std::ostream& os, const MipsManagedRegister& reg); } // namespace mips -inline mips::MipsManagedRegister ManagedRegister::AsMips() const { +constexpr inline mips::MipsManagedRegister ManagedRegister::AsMips() const { mips::MipsManagedRegister reg(id_); CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); return reg; diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index ab480cafd5..57223b52a3 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -26,15 +26,23 @@ namespace art { namespace mips64 { +static_assert(static_cast<size_t>(kMips64PointerSize) == kMips64DoublewordSize, + "Unexpected Mips64 pointer size."); +static_assert(kMips64PointerSize == PointerSize::k64, "Unexpected Mips64 pointer size."); + + void Mips64Assembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + ReserveJumpTableSpace(); + EmitLiterals(); PromoteBranches(); } void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { EmitBranches(); + EmitJumpTables(); Assembler::FinalizeInstructions(region); PatchCFI(); } @@ -176,6 +184,122 @@ void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister ft, uint16_t imm) Emit(encoding); } +void Mips64Assembler::EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wt, kNoVectorRegister); + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + static_cast<uint32_t>(wt) << kWtShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaBIT(int operation, + int df_m, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df_m << kDfMShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaELM(int operation, + int df_n, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaELMOperationShift | + df_n << kDfNShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaMI10(int s10, + GpuRegister rs, + VectorRegister wd, + int minor_opcode, + int df) { + CHECK_NE(rs, kNoGpuRegister); + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(s10)) << s10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + s10 << kS10Shift | + static_cast<uint32_t>(rs) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode << kS10MinorShift | + df; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaI10(int operation, + int df, + int i10, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(i10)) << i10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + i10 << kI10Shift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2R(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2ROperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2RF(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2RFOperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x21); } @@ -313,6 +437,18 @@ void Mips64Assembler::Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size) { EmitR(0x1f, rs, rt, static_cast<GpuRegister>(pos + size - 33), pos - 32, 0x6); } +void Mips64Assembler::Lsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne) { + CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne; + int sa = saPlusOne - 1; + EmitR(0x0, rs, rt, rd, sa, 0x05); +} + +void Mips64Assembler::Dlsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne) { + CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne; + int sa = saPlusOne - 1; + EmitR(0x0, rs, rt, rd, sa, 0x15); +} + void Mips64Assembler::Wsbh(GpuRegister rd, GpuRegister rt) { EmitRtd(0x1f, rt, rd, 2, 0x20); } @@ -445,10 +581,34 @@ void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0x27, rs, rt, imm16); } +void Mips64Assembler::Lwpc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, (0x01 << 19) | imm19); +} + +void Mips64Assembler::Lwupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, (0x02 << 19) | imm19); +} + +void Mips64Assembler::Ldpc(GpuRegister rs, uint32_t imm18) { + CHECK(IsUint<18>(imm18)) << imm18; + EmitI21(0x3B, rs, (0x06 << 18) | imm18); +} + void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) { EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16); } +void Mips64Assembler::Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0xf, rs, rt, imm16); +} + +void Mips64Assembler::Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + CHECK_NE(rs, ZERO); + EmitI(0x1d, rs, rt, imm16); +} + void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) { EmitI(1, rs, static_cast<GpuRegister>(6), imm16); } @@ -543,6 +703,10 @@ void Mips64Assembler::Bc(uint32_t imm26) { EmitI26(0x32, imm26); } +void Mips64Assembler::Balc(uint32_t imm26) { + EmitI26(0x3A, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -1032,133 +1196,514 @@ void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) { Nor(rd, rs, ZERO); } +void Mips64Assembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ffint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::MoveV(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) { + CHECK(HasMsa()); + CHECK(IsUint<4>(n4)) << n4; + EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) { + CHECK(HasMsa()); + CHECK(IsUint<3>(n3)) << n3; + EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) { + CHECK(HasMsa()); + CHECK(IsUint<2>(n2)) << n2; + EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) { + CHECK(HasMsa()); + CHECK(IsUint<1>(n1)) << n1; + EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19); +} + +void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillH(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillW(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); + EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::LdiB(VectorRegister wd, int imm8) { + CHECK(HasMsa()); + CHECK(IsInt<8>(imm8)) << imm8; + EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiH(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiW(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiD(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0); +} + +void Mips64Assembler::LdH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1); +} + +void Mips64Assembler::LdW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2); +} + +void Mips64Assembler::LdD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3); +} + +void Mips64Assembler::StB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0); +} + +void Mips64Assembler::StH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1); +} + +void Mips64Assembler::StW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2); +} + +void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3); +} + void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) { - if (IsUint<16>(value)) { - // Use OR with (unsigned) immediate to encode 16b unsigned int. - Ori(rd, ZERO, value); - } else if (IsInt<16>(value)) { - // Use ADD with (signed) immediate to encode 16b signed int. - Addiu(rd, ZERO, value); - } else { - Lui(rd, value >> 16); - if (value & 0xFFFF) - Ori(rd, rd, value); - } + TemplateLoadConst32(this, rd, value); +} + +// This function is only used for testing purposes. +void Mips64Assembler::RecordLoadConst64Path(int value ATTRIBUTE_UNUSED) { } void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { - int bit31 = (value & UINT64_C(0x80000000)) != 0; - - // Loads with 1 instruction. - if (IsUint<16>(value)) { - Ori(rd, ZERO, value); - } else if (IsInt<16>(value)) { - Daddiu(rd, ZERO, value); - } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) { - Lui(rd, value >> 16); - } else if (IsInt<32>(value)) { - // Loads with 2 instructions. - Lui(rd, value >> 16); - Ori(rd, rd, value); - } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) { - Ori(rd, ZERO, value); - Dahi(rd, value >> 32); - } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) { - Ori(rd, ZERO, value); - Dati(rd, value >> 48); - } else if ((value & 0xFFFF) == 0 && - (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) { - Lui(rd, value >> 16); - Dahi(rd, (value >> 32) + bit31); - } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) { - Lui(rd, value >> 16); - Dati(rd, (value >> 48) + bit31); - } else if (IsPowerOfTwo(value + UINT64_C(1))) { - int shift_cnt = 64 - CTZ(value + UINT64_C(1)); - Daddiu(rd, ZERO, -1); - if (shift_cnt < 32) { - Dsrl(rd, rd, shift_cnt); - } else { - Dsrl32(rd, rd, shift_cnt & 31); - } + TemplateLoadConst64(this, rd, value); +} + +void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value) { + if (IsInt<16>(value)) { + Addiu(rt, rs, value); } else { - int shift_cnt = CTZ(value); - int64_t tmp = value >> shift_cnt; - if (IsUint<16>(tmp)) { - Ori(rd, ZERO, tmp); - if (shift_cnt < 32) { - Dsll(rd, rd, shift_cnt); - } else { - Dsll32(rd, rd, shift_cnt & 31); - } - } else if (IsInt<16>(tmp)) { - Daddiu(rd, ZERO, tmp); - if (shift_cnt < 32) { - Dsll(rd, rd, shift_cnt); - } else { - Dsll32(rd, rd, shift_cnt & 31); - } - } else if (IsInt<32>(tmp)) { - // Loads with 3 instructions. - Lui(rd, tmp >> 16); - Ori(rd, rd, tmp); - if (shift_cnt < 32) { - Dsll(rd, rd, shift_cnt); - } else { - Dsll32(rd, rd, shift_cnt & 31); - } - } else { - shift_cnt = 16 + CTZ(value >> 16); - tmp = value >> shift_cnt; - if (IsUint<16>(tmp)) { - Ori(rd, ZERO, tmp); - if (shift_cnt < 32) { - Dsll(rd, rd, shift_cnt); - } else { - Dsll32(rd, rd, shift_cnt & 31); - } - Ori(rd, rd, value); - } else if (IsInt<16>(tmp)) { - Daddiu(rd, ZERO, tmp); - if (shift_cnt < 32) { - Dsll(rd, rd, shift_cnt); - } else { - Dsll32(rd, rd, shift_cnt & 31); - } - Ori(rd, rd, value); - } else { - // Loads with 3-4 instructions. - uint64_t tmp2 = value; - bool used_lui = false; - if (((tmp2 >> 16) & 0xFFFF) != 0 || (tmp2 & 0xFFFFFFFF) == 0) { - Lui(rd, tmp2 >> 16); - used_lui = true; - } - if ((tmp2 & 0xFFFF) != 0) { - if (used_lui) { - Ori(rd, rd, tmp2); - } else { - Ori(rd, ZERO, tmp2); - } - } - if (bit31) { - tmp2 += UINT64_C(0x100000000); - } - if (((tmp2 >> 32) & 0xFFFF) != 0) { - Dahi(rd, tmp2 >> 32); - } - if (tmp2 & UINT64_C(0x800000000000)) { - tmp2 += UINT64_C(0x1000000000000); - } - if ((tmp2 >> 48) != 0) { - Dati(rd, tmp2 >> 48); - } - } + int16_t high = High16Bits(value); + int16_t low = Low16Bits(value); + high += (low < 0) ? 1 : 0; // Account for sign extension in addiu. + Aui(rt, rs, high); + if (low != 0) { + Addiu(rt, rt, low); } } } +// TODO: don't use rtmp, use daui, dahi, dati. void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { + CHECK_NE(rs, rtmp); if (IsInt<16>(value)) { Daddiu(rt, rs, value); } else { @@ -1173,19 +1718,37 @@ void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBit type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void Mips64Assembler::Branch::InitializeType(bool is_call) { +void Mips64Assembler::Branch::InitializeType(Type initial_type) { OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); - if (is_call) { - InitShortOrLong(offset_size, kCall, kLongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); - } else { - if (condition_ == kCondEQZ || condition_ == kCondNEZ) { - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; - } else { - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); - } + switch (initial_type) { + case kLabel: + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: + CHECK(!IsResolved()); + type_ = initial_type; + break; + case kCall: + InitShortOrLong(offset_size, kCall, kLongCall); + break; + case kCondBranch: + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + break; + default: + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + break; + } + break; + default: + LOG(FATAL) << "Unexpected branch type " << initial_type; + UNREACHABLE(); } old_type_ = type_; } @@ -1218,14 +1781,14 @@ bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, } } -Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call) : old_location_(location), location_(location), target_(target), lhs_reg_(ZERO), rhs_reg_(ZERO), condition_(kUncond) { - InitializeType(false); + InitializeType(is_call ? kCall : kCondBranch); } Mips64Assembler::Branch::Branch(uint32_t location, @@ -1273,19 +1836,18 @@ Mips64Assembler::Branch::Branch(uint32_t location, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(false); + InitializeType(kCondBranch); } -Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) +Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type) : old_location_(location), location_(location), - target_(target), - lhs_reg_(indirect_reg), + target_(kUnresolved), + lhs_reg_(dest_reg), rhs_reg_(ZERO), condition_(kUncond) { - CHECK_NE(indirect_reg, ZERO); - CHECK_NE(indirect_reg, AT); - InitializeType(true); + CHECK_NE(dest_reg, ZERO); + InitializeType(label_or_literal_type); } Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( @@ -1387,11 +1949,23 @@ bool Mips64Assembler::Branch::IsLong() const { case kUncondBranch: case kCondBranch: case kCall: + // Near label. + case kLabel: + // Near literals. + case kLiteral: + case kLiteralUnsigned: + case kLiteralLong: return false; // Long branches. case kLongUncondBranch: case kLongCondBranch: case kLongCall: + // Far label. + case kFarLabel: + // Far literals. + case kFarLiteral: + case kFarLiteralUnsigned: + case kFarLiteralLong: return true; } UNREACHABLE(); @@ -1460,6 +2034,20 @@ void Mips64Assembler::Branch::PromoteToLong() { case kCall: type_ = kLongCall; break; + // Near label. + case kLabel: + type_ = kFarLabel; + break; + // Near literals. + case kLiteral: + type_ = kFarLiteral; + break; + case kLiteralUnsigned: + type_ = kFarLiteralUnsigned; + break; + case kLiteralLong: + type_ = kFarLiteralLong; + break; default: // Note: 'type_' is already long. break; @@ -1506,7 +2094,15 @@ uint32_t Mips64Assembler::Branch::GetOffset() const { uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); // Calculate the byte distance between instructions and also account for // different PC-relative origins. - uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + uint32_t offset_location = GetOffsetLocation(); + if (type_ == kLiteralLong) { + // Special case for the ldpc instruction, whose address (PC) is rounded down to + // a multiple of 8 before adding the offset. + // Note, branch promotion has already taken care of aligning `target_` to an + // address that's a multiple of 8. + offset_location = RoundDown(offset_location, sizeof(uint64_t)); + } + uint32_t offset = target_ - offset_location - branch_info_[type_].pc_org * sizeof(uint32_t); // Prepare the offset for encoding into the instruction(s). offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; return offset; @@ -1553,7 +2149,7 @@ void Mips64Assembler::Bind(Mips64Label* label) { label->BindTo(bound_pc); } -uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { +uint32_t Mips64Assembler::GetLabelLocation(const Mips64Label* label) const { CHECK(label->IsBound()); uint32_t target = label->Position(); if (label->prev_branch_id_plus_one_) { @@ -1609,7 +2205,7 @@ void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { void Mips64Assembler::Buncond(Mips64Label* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target); + branches_.emplace_back(buffer_.Size(), target, /* is_call */ false); FinalizeLabeledBranch(label); } @@ -1626,12 +2222,148 @@ void Mips64Assembler::Bcond(Mips64Label* label, FinalizeLabeledBranch(label); } -void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { +void Mips64Assembler::Call(Mips64Label* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target, indirect_reg); + branches_.emplace_back(buffer_.Size(), target, /* is_call */ true); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::LoadLabelAddress(GpuRegister dest_reg, Mips64Label* label) { + // Label address loads are treated as pseudo branches since they require very similar handling. + DCHECK(!label->IsBound()); + branches_.emplace_back(buffer_.Size(), dest_reg, Branch::kLabel); FinalizeLabeledBranch(label); } +Literal* Mips64Assembler::NewLiteral(size_t size, const uint8_t* data) { + // We don't support byte and half-word literals. + if (size == 4u) { + literals_.emplace_back(size, data); + return &literals_.back(); + } else { + DCHECK_EQ(size, 8u); + long_literals_.emplace_back(size, data); + return &long_literals_.back(); + } +} + +void Mips64Assembler::LoadLiteral(GpuRegister dest_reg, + LoadOperandType load_type, + Literal* literal) { + // Literal loads are treated as pseudo branches since they require very similar handling. + Branch::Type literal_type; + switch (load_type) { + case kLoadWord: + DCHECK_EQ(literal->GetSize(), 4u); + literal_type = Branch::kLiteral; + break; + case kLoadUnsignedWord: + DCHECK_EQ(literal->GetSize(), 4u); + literal_type = Branch::kLiteralUnsigned; + break; + case kLoadDoubleword: + DCHECK_EQ(literal->GetSize(), 8u); + literal_type = Branch::kLiteralLong; + break; + default: + LOG(FATAL) << "Unexpected literal load type " << load_type; + UNREACHABLE(); + } + Mips64Label* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(buffer_.Size(), dest_reg, literal_type); + FinalizeLabeledBranch(label); +} + +JumpTable* Mips64Assembler::CreateJumpTable(std::vector<Mips64Label*>&& labels) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + return table; +} + +void Mips64Assembler::ReserveJumpTableSpace() { + if (!jump_tables_.empty()) { + for (JumpTable& table : jump_tables_) { + Mips64Label* label = table.GetLabel(); + Bind(label); + + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + size_t required_capacity = orig_size + table.GetSize(); + if (required_capacity > buffer_.Capacity()) { + buffer_.ExtendCapacity(required_capacity); + } +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + // Fill the space with dummy data as the data is not final + // until the branches have been promoted. And we shouldn't + // be moving uninitialized data during branch promotion. + for (size_t cnt = table.GetData().size(), i = 0; i < cnt; i++) { + buffer_.Emit<uint32_t>(0x1abe1234u); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + } + } +} + +void Mips64Assembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (here, jump tables) in the buffer. + overwriting_ = true; + + for (JumpTable& table : jump_tables_) { + Mips64Label* table_label = table.GetLabel(); + uint32_t start = GetLabelLocation(table_label); + overwrite_location_ = start; + + for (Mips64Label* target : table.GetData()) { + CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u); + // The table will contain target addresses relative to the table start. + uint32_t offset = GetLabelLocation(target) - start; + Emit(offset); + } + } + + overwriting_ = false; + } +} + +void Mips64Assembler::EmitLiterals() { + if (!literals_.empty()) { + for (Literal& literal : literals_) { + Mips64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 4u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } + if (!long_literals_.empty()) { + // Reserve 4 bytes for potential alignment. If after the branch promotion the 64-bit + // literals don't end up 8-byte-aligned, they will be moved down 4 bytes. + Emit(0); // NOP. + for (Literal& literal : long_literals_) { + Mips64Label* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(literal.GetSize(), 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + void Mips64Assembler::PromoteBranches() { // Promote short branches to long as necessary. bool changed; @@ -1670,6 +2402,35 @@ void Mips64Assembler::PromoteBranches() { end = branch.GetOldLocation(); } } + + // Align 64-bit literals by moving them down by 4 bytes if needed. + // This will reduce the PC-relative distance, which should be safe for both near and far literals. + if (!long_literals_.empty()) { + uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel()); + size_t lit_size = long_literals_.size() * sizeof(uint64_t); + size_t buf_size = buffer_.Size(); + // 64-bit literals must be at the very end of the buffer. + CHECK_EQ(first_literal_location + lit_size, buf_size); + if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) { + buffer_.Move(first_literal_location - sizeof(uint32_t), first_literal_location, lit_size); + // The 4 reserved bytes proved useless, reduce the buffer size. + buffer_.Resize(buf_size - sizeof(uint32_t)); + // Reduce target addresses in literal and address loads by 4 bytes in order for correct + // offsets from PC to be generated. + for (auto& branch : branches_) { + uint32_t target = branch.GetTarget(); + if (target >= first_literal_location) { + branch.Resolve(target - sizeof(uint32_t)); + } + } + // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal, + // we need to adjust the location of the literal's label as well. + for (Literal& literal : long_literals_) { + // Bound label's position is negative, hence incrementing it instead of decrementing. + literal.GetLabel()->position_ += sizeof(uint32_t); + } + } + } } // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. @@ -1678,11 +2439,23 @@ const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[ { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch // Exception: kOffset23 for beqzc/bnezc - { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kCall + // Near label. + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLabel + // Near literals. + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLiteral + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLiteralUnsigned + { 1, 0, 0, Mips64Assembler::Branch::kOffset21, 3 }, // kLiteralLong // Long branches. { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch - { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall + // Far label. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLabel + // Far literals. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLiteral + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLiteralUnsigned + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kFarLiteralLong }; // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. @@ -1706,8 +2479,26 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { break; case Branch::kCall: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Balc(offset); + break; + + // Near label. + case Branch::kLabel: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Addiupc(lhs, offset); - Jialc(lhs, 0); + break; + // Near literals. + case Branch::kLiteral: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwpc(lhs, offset); + break; + case Branch::kLiteralUnsigned: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwupc(lhs, offset); + break; + case Branch::kLiteralLong: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Ldpc(lhs, offset); break; // Long branches. @@ -1725,11 +2516,37 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in jialc. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jialc(AT, Low16Bits(offset)); + break; + + // Far label. + case Branch::kFarLabel: offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Auipc(lhs, High16Bits(offset)); - Daddiu(lhs, lhs, Low16Bits(offset)); - Jialc(lhs, 0); + Auipc(AT, High16Bits(offset)); + Daddiu(lhs, AT, Low16Bits(offset)); + break; + // Far literals. + case Branch::kFarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lw(lhs, AT, Low16Bits(offset)); + break; + case Branch::kFarLiteralUnsigned: + offset += (offset & 0x8000) << 1; // Account for sign extension in lwu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lwu(lhs, AT, Low16Bits(offset)); + break; + case Branch::kFarLiteralLong: + offset += (offset & 0x8000) << 1; // Account for sign extension in ld. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Ld(lhs, AT, Low16Bits(offset)); break; } CHECK_EQ(overwrite_location_, branch->GetEndLocation()); @@ -1740,8 +2557,8 @@ void Mips64Assembler::Bc(Mips64Label* label) { Buncond(label); } -void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { - Call(label, indirect_reg); +void Mips64Assembler::Balc(Mips64Label* label) { + Call(label); } void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { @@ -1800,80 +2617,103 @@ void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); } -void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, - int32_t offset) { - if (!IsInt<16>(offset) || - (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); +void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base, + int32_t& offset, + bool is_doubleword) { + // This method is used to adjust the base register and offset pair + // for a load/store when the offset doesn't fit into int16_t. + // It is assumed that `base + offset` is sufficiently aligned for memory + // operands that are machine word in size or smaller. For doubleword-sized + // operands it's assumed that `base` is a multiple of 8, while `offset` + // may be a multiple of 4 (e.g. 4-byte-aligned long and double arguments + // and spilled variables on the stack accessed relative to the stack + // pointer register). + // We preserve the "alignment" of `offset` by adjusting it by a multiple of 8. + CHECK_NE(base, AT); // Must not overwrite the register `base` while loading `offset`. + + bool doubleword_aligned = IsAligned<kMips64DoublewordSize>(offset); + bool two_accesses = is_doubleword && !doubleword_aligned; + + // IsInt<16> must be passed a signed value, hence the static cast below. + if (IsInt<16>(offset) && + (!two_accesses || IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { + // Nothing to do: `offset` (and, if needed, `offset + 4`) fits into int16_t. + return; } - switch (type) { - case kLoadSignedByte: - Lb(reg, base, offset); - break; - case kLoadUnsignedByte: - Lbu(reg, base, offset); - break; - case kLoadSignedHalfword: - Lh(reg, base, offset); - break; - case kLoadUnsignedHalfword: - Lhu(reg, base, offset); - break; - case kLoadWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lw(reg, base, offset); - break; - case kLoadUnsignedWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lwu(reg, base, offset); - break; - case kLoadDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Lwu(reg, base, offset); - Lwu(TMP2, base, offset + kMips64WordSize); - Dinsu(reg, TMP2, 32, 32); - } else { - Ld(reg, base, offset); - } - break; - } -} + // Remember the "(mis)alignment" of `offset`, it will be checked at the end. + uint32_t misalignment = offset & (kMips64DoublewordSize - 1); + + // First, see if `offset` can be represented as a sum of two 16-bit signed + // offsets. This can save an instruction. + // To simplify matters, only do this for a symmetric range of offsets from + // about -64KB to about +64KB, allowing further addition of 4 when accessing + // 64-bit variables with two 32-bit accesses. + constexpr int32_t kMinOffsetForSimpleAdjustment = 0x7ff8; // Max int16_t that's a multiple of 8. + constexpr int32_t kMaxOffsetForSimpleAdjustment = 2 * kMinOffsetForSimpleAdjustment; + + if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) { + Daddiu(AT, base, kMinOffsetForSimpleAdjustment); + offset -= kMinOffsetForSimpleAdjustment; + } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) { + Daddiu(AT, base, -kMinOffsetForSimpleAdjustment); + offset += kMinOffsetForSimpleAdjustment; + } else { + // In more complex cases take advantage of the daui instruction, e.g.: + // daui AT, base, offset_high + // [dahi AT, 1] // When `offset` is close to +2GB. + // lw reg_lo, offset_low(AT) + // [lw reg_hi, (offset_low+4)(AT)] // If misaligned 64-bit load. + // or when offset_low+4 overflows int16_t: + // daui AT, base, offset_high + // daddiu AT, AT, 8 + // lw reg_lo, (offset_low-8)(AT) + // lw reg_hi, (offset_low-4)(AT) + int16_t offset_low = Low16Bits(offset); + int32_t offset_low32 = offset_low; + int16_t offset_high = High16Bits(offset); + bool increment_hi16 = offset_low < 0; + bool overflow_hi16 = false; + + if (increment_hi16) { + offset_high++; + overflow_hi16 = (offset_high == -32768); + } + Daui(AT, base, offset_high); -void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, - int32_t offset) { - if (!IsInt<16>(offset) || - (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); + if (overflow_hi16) { + Dahi(AT, 1); + } + + if (two_accesses && !IsInt<16>(static_cast<int32_t>(offset_low32 + kMips64WordSize))) { + // Avoid overflow in the 16-bit offset of the load/store instruction when adding 4. + Daddiu(AT, AT, kMips64DoublewordSize); + offset_low32 -= kMips64DoublewordSize; + } + + offset = offset_low32; } + base = AT; - switch (type) { - case kLoadWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lwc1(reg, base, offset); - break; - case kLoadDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Lwc1(reg, base, offset); - Lw(TMP2, base, offset + kMips64WordSize); - Mthc1(TMP2, reg); - } else { - Ldc1(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; + CHECK(IsInt<16>(offset)); + if (two_accesses) { + CHECK(IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize))); } + CHECK_EQ(misalignment, offset & (kMips64DoublewordSize - 1)); +} + +void Mips64Assembler::LoadFromOffset(LoadOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset) { + LoadFromOffset<>(type, reg, base, offset); +} + +void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset) { + LoadFpuFromOffset<>(type, reg, base, offset); } void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, @@ -1903,72 +2743,18 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, } } -void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, +void Mips64Assembler::StoreToOffset(StoreOperandType type, + GpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kStoreByte: - Sb(reg, base, offset); - break; - case kStoreHalfword: - Sh(reg, base, offset); - break; - case kStoreWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Sw(reg, base, offset); - break; - case kStoreDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Sw(reg, base, offset); - Dsrl32(TMP2, reg, 0); - Sw(TMP2, base, offset + kMips64WordSize); - } else { - Sd(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreToOffset<>(type, reg, base, offset); } -void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, +void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, + FpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kStoreWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Swc1(reg, base, offset); - break; - case kStoreDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Mfhc1(TMP2, reg); - Swc1(reg, base, offset); - Sw(TMP2, base, offset + kMips64WordSize); - } else { - Sdc1(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreFpuToOffset<>(type, reg, base, offset); } static dwarf::Reg DWARFReg(GpuRegister reg) { @@ -1977,8 +2763,9 @@ static dwarf::Reg DWARFReg(GpuRegister reg) { constexpr size_t kFramePointerSize = 8; -void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, +void Mips64Assembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); DCHECK(!overwriting_); @@ -1992,7 +2779,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, cfi_.RelOffset(DWARFReg(RA), stack_offset); for (int i = callee_save_regs.size() - 1; i >= 0; --i) { stack_offset -= kFramePointerSize; - GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); + GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister(); StoreToOffset(kStoreDoubleword, reg, SP, stack_offset); cfi_.RelOffset(DWARFReg(reg), stack_offset); } @@ -2003,7 +2790,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // Write out entry spills. int32_t offset = frame_size + kFramePointerSize; for (size_t i = 0; i < entry_spills.size(); ++i) { - Mips64ManagedRegister reg = entry_spills.at(i).AsMips64(); + Mips64ManagedRegister reg = entry_spills[i].AsMips64(); ManagedRegisterSpill spill = entry_spills.at(i); int32_t size = spill.getSize(); if (reg.IsNoRegister()) { @@ -2022,7 +2809,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, } void Mips64Assembler::RemoveFrame(size_t frame_size, - const std::vector<ManagedRegister>& callee_save_regs) { + ArrayRef<const ManagedRegister> callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); DCHECK(!overwriting_); cfi_.RememberState(); @@ -2030,7 +2817,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, // Pop callee saves and return address int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize; for (size_t i = 0; i < callee_save_regs.size(); ++i) { - GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); + GpuRegister reg = callee_save_regs[i].AsMips64().AsGpuRegister(); LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset); cfi_.Restore(DWARFReg(reg)); stack_offset += kFramePointerSize; @@ -2109,16 +2896,16 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { +void Mips64Assembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; Daddiu64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); } -void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) { +void Mips64Assembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) { StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); } @@ -2135,9 +2922,7 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) return EmitLoad(mdest, SP, src.Int32Value(), size); } -void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, - ThreadOffset<kMips64DoublewordSize> src, - size_t size) { +void Mips64Assembler::LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -2153,12 +2938,8 @@ void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, Membe CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister()); LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), base.AsMips64().AsGpuRegister(), offs.Int32Value()); - if (kPoisonHeapReferences && unpoison_reference) { - // TODO: review - // Negate the 32-bit ref - Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister()); - // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64 - Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32); + if (unpoison_reference) { + MaybeUnpoisonHeapReference(dest.AsGpuRegister()); } } @@ -2170,8 +2951,7 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, base.AsMips64().AsGpuRegister(), offs.Int32Value()); } -void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<kMips64DoublewordSize> offs) { +void Mips64Assembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); @@ -2215,18 +2995,18 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<kMips64DoublewordSize> thr_offs, - ManagedRegister mscratch) { +void Mips64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 thr_offs, + ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); } -void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { +void Mips64Assembler::CopyRawPtrToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), @@ -2428,8 +3208,8 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr // TODO: place reference map on call } -void Mips64Assembler::CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset ATTRIBUTE_UNUSED, - ManagedRegister mscratch ATTRIBUTE_UNUSED) { +void Mips64Assembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } @@ -2448,7 +3228,7 @@ void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjus LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, - Thread::ExceptionOffset<kMips64DoublewordSize>().Int32Value()); + Thread::ExceptionOffset<kMips64PointerSize>().Int32Value()); Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry()); } @@ -2465,7 +3245,7 @@ void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) { LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pDeliverException).Int32Value()); + QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, pDeliverException).Int32Value()); Jr(T9); Nop(); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 8acc38ac82..666c6935a1 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,20 +17,257 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <deque> #include <utility> #include <vector> +#include "arch/mips64/instruction_set_features_mips64.h" +#include "base/arena_containers.h" +#include "base/enums.h" #include "base/macros.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" #include "offsets.h" #include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" #include "utils/label.h" namespace art { namespace mips64 { +enum LoadConst64Path { + kLoadConst64PathZero = 0x0, + kLoadConst64PathOri = 0x1, + kLoadConst64PathDaddiu = 0x2, + kLoadConst64PathLui = 0x4, + kLoadConst64PathLuiOri = 0x8, + kLoadConst64PathOriDahi = 0x10, + kLoadConst64PathOriDati = 0x20, + kLoadConst64PathLuiDahi = 0x40, + kLoadConst64PathLuiDati = 0x80, + kLoadConst64PathDaddiuDsrlX = 0x100, + kLoadConst64PathOriDsllX = 0x200, + kLoadConst64PathDaddiuDsllX = 0x400, + kLoadConst64PathLuiOriDsllX = 0x800, + kLoadConst64PathOriDsllXOri = 0x1000, + kLoadConst64PathDaddiuDsllXOri = 0x2000, + kLoadConst64PathDaddiuDahi = 0x4000, + kLoadConst64PathDaddiuDati = 0x8000, + kLoadConst64PathDinsu1 = 0x10000, + kLoadConst64PathDinsu2 = 0x20000, + kLoadConst64PathCatchAll = 0x40000, + kLoadConst64PathAllPaths = 0x7ffff, +}; + +template <typename Asm> +void TemplateLoadConst32(Asm* a, GpuRegister rd, int32_t value) { + if (IsUint<16>(value)) { + // Use OR with (unsigned) immediate to encode 16b unsigned int. + a->Ori(rd, ZERO, value); + } else if (IsInt<16>(value)) { + // Use ADD with (signed) immediate to encode 16b signed int. + a->Addiu(rd, ZERO, value); + } else { + // Set 16 most significant bits of value. The "lui" instruction + // also clears the 16 least significant bits to zero. + a->Lui(rd, value >> 16); + if (value & 0xFFFF) { + // If the 16 least significant bits are non-zero, set them + // here. + a->Ori(rd, rd, value); + } + } +} + +static inline int InstrCountForLoadReplicatedConst32(int64_t value) { + int32_t x = Low32Bits(value); + int32_t y = High32Bits(value); + + if (x == y) { + return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0 && IsInt<16>(value >> 16))) ? 2 : 3; + } + + return INT_MAX; +} + +template <typename Asm, typename Rtype, typename Vtype> +void TemplateLoadConst64(Asm* a, Rtype rd, Vtype value) { + int bit31 = (value & UINT64_C(0x80000000)) != 0; + int rep32_count = InstrCountForLoadReplicatedConst32(value); + + // Loads with 1 instruction. + if (IsUint<16>(value)) { + // 64-bit value can be loaded as an unsigned 16-bit number. + a->RecordLoadConst64Path(kLoadConst64PathOri); + a->Ori(rd, ZERO, value); + } else if (IsInt<16>(value)) { + // 64-bit value can be loaded as an signed 16-bit number. + a->RecordLoadConst64Path(kLoadConst64PathDaddiu); + a->Daddiu(rd, ZERO, value); + } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) { + // 64-bit value can be loaded as an signed 32-bit number which has all + // of its 16 least significant bits set to zero. + a->RecordLoadConst64Path(kLoadConst64PathLui); + a->Lui(rd, value >> 16); + } else if (IsInt<32>(value)) { + // Loads with 2 instructions. + // 64-bit value can be loaded as an signed 32-bit number which has some + // or all of its 16 least significant bits set to one. + a->RecordLoadConst64Path(kLoadConst64PathLuiOri); + a->Lui(rd, value >> 16); + a->Ori(rd, rd, value); + } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) { + // 64-bit value which consists of an unsigned 16-bit value in its + // least significant 32-bits, and a signed 16-bit value in its + // most significant 32-bits. + a->RecordLoadConst64Path(kLoadConst64PathOriDahi); + a->Ori(rd, ZERO, value); + a->Dahi(rd, value >> 32); + } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) { + // 64-bit value which consists of an unsigned 16-bit value in its + // least significant 48-bits, and a signed 16-bit value in its + // most significant 16-bits. + a->RecordLoadConst64Path(kLoadConst64PathOriDati); + a->Ori(rd, ZERO, value); + a->Dati(rd, value >> 48); + } else if ((value & 0xFFFF) == 0 && + (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) { + // 16 LSBs (Least Significant Bits) all set to zero. + // 48 MSBs (Most Significant Bits) hold a signed 32-bit value. + a->RecordLoadConst64Path(kLoadConst64PathLuiDahi); + a->Lui(rd, value >> 16); + a->Dahi(rd, (value >> 32) + bit31); + } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) { + // 16 LSBs all set to zero. + // 48 MSBs hold a signed value which can't be represented by signed + // 32-bit number, and the middle 16 bits are all zero, or all one. + a->RecordLoadConst64Path(kLoadConst64PathLuiDati); + a->Lui(rd, value >> 16); + a->Dati(rd, (value >> 48) + bit31); + } else if (IsInt<16>(static_cast<int32_t>(value)) && + (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) { + // 32 LSBs contain an unsigned 16-bit number. + // 32 MSBs contain a signed 16-bit number. + a->RecordLoadConst64Path(kLoadConst64PathDaddiuDahi); + a->Daddiu(rd, ZERO, value); + a->Dahi(rd, (value >> 32) + bit31); + } else if (IsInt<16>(static_cast<int32_t>(value)) && + ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) { + // 48 LSBs contain an unsigned 16-bit number. + // 16 MSBs contain a signed 16-bit number. + a->RecordLoadConst64Path(kLoadConst64PathDaddiuDati); + a->Daddiu(rd, ZERO, value); + a->Dati(rd, (value >> 48) + bit31); + } else if (IsPowerOfTwo(value + UINT64_C(1))) { + // 64-bit values which have their "n" MSBs set to one, and their + // "64-n" LSBs set to zero. "n" must meet the restrictions 0 < n < 64. + int shift_cnt = 64 - CTZ(value + UINT64_C(1)); + a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsrlX); + a->Daddiu(rd, ZERO, -1); + if (shift_cnt < 32) { + a->Dsrl(rd, rd, shift_cnt); + } else { + a->Dsrl32(rd, rd, shift_cnt & 31); + } + } else { + int shift_cnt = CTZ(value); + int64_t tmp = value >> shift_cnt; + a->RecordLoadConst64Path(kLoadConst64PathOriDsllX); + if (IsUint<16>(tmp)) { + // Value can be computed by loading a 16-bit unsigned value, and + // then shifting left. + a->Ori(rd, ZERO, tmp); + if (shift_cnt < 32) { + a->Dsll(rd, rd, shift_cnt); + } else { + a->Dsll32(rd, rd, shift_cnt & 31); + } + } else if (IsInt<16>(tmp)) { + // Value can be computed by loading a 16-bit signed value, and + // then shifting left. + a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllX); + a->Daddiu(rd, ZERO, tmp); + if (shift_cnt < 32) { + a->Dsll(rd, rd, shift_cnt); + } else { + a->Dsll32(rd, rd, shift_cnt & 31); + } + } else if (rep32_count < 3) { + // Value being loaded has 32 LSBs equal to the 32 MSBs, and the + // value loaded into the 32 LSBs can be loaded with a single + // MIPS instruction. + a->LoadConst32(rd, value); + a->Dinsu(rd, rd, 32, 32); + a->RecordLoadConst64Path(kLoadConst64PathDinsu1); + } else if (IsInt<32>(tmp)) { + // Loads with 3 instructions. + // Value can be computed by loading a 32-bit signed value, and + // then shifting left. + a->RecordLoadConst64Path(kLoadConst64PathLuiOriDsllX); + a->Lui(rd, tmp >> 16); + a->Ori(rd, rd, tmp); + if (shift_cnt < 32) { + a->Dsll(rd, rd, shift_cnt); + } else { + a->Dsll32(rd, rd, shift_cnt & 31); + } + } else { + shift_cnt = 16 + CTZ(value >> 16); + tmp = value >> shift_cnt; + if (IsUint<16>(tmp)) { + // Value can be computed by loading a 16-bit unsigned value, + // shifting left, and "or"ing in another 16-bit unsigned value. + a->RecordLoadConst64Path(kLoadConst64PathOriDsllXOri); + a->Ori(rd, ZERO, tmp); + if (shift_cnt < 32) { + a->Dsll(rd, rd, shift_cnt); + } else { + a->Dsll32(rd, rd, shift_cnt & 31); + } + a->Ori(rd, rd, value); + } else if (IsInt<16>(tmp)) { + // Value can be computed by loading a 16-bit signed value, + // shifting left, and "or"ing in a 16-bit unsigned value. + a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllXOri); + a->Daddiu(rd, ZERO, tmp); + if (shift_cnt < 32) { + a->Dsll(rd, rd, shift_cnt); + } else { + a->Dsll32(rd, rd, shift_cnt & 31); + } + a->Ori(rd, rd, value); + } else if (rep32_count < 4) { + // Value being loaded has 32 LSBs equal to the 32 MSBs, and the + // value in the 32 LSBs requires 2 MIPS instructions to load. + a->LoadConst32(rd, value); + a->Dinsu(rd, rd, 32, 32); + a->RecordLoadConst64Path(kLoadConst64PathDinsu2); + } else { + // Loads with 3-4 instructions. + // Catch-all case to get any other 64-bit values which aren't + // handled by special cases above. + uint64_t tmp2 = value; + a->RecordLoadConst64Path(kLoadConst64PathCatchAll); + a->LoadConst32(rd, value); + if (bit31) { + tmp2 += UINT64_C(0x100000000); + } + if (((tmp2 >> 32) & 0xFFFF) != 0) { + a->Dahi(rd, tmp2 >> 32); + } + if (tmp2 & UINT64_C(0x800000000000)) { + tmp2 += UINT64_C(0x1000000000000); + } + if ((tmp2 >> 48) != 0) { + a->Dati(rd, tmp2 >> 48); + } + } + } + } +} + +static constexpr size_t kMips64HalfwordSize = 2; static constexpr size_t kMips64WordSize = 4; static constexpr size_t kMips64DoublewordSize = 8; @@ -79,6 +316,79 @@ class Mips64Label : public Label { DISALLOW_COPY_AND_ASSIGN(Mips64Label); }; +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + Mips64Label* GetLabel() { + return &label_; + } + + const Mips64Label* GetLabel() const { + return &label_; + } + + private: + Mips64Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + +// Jump table: table of labels emitted after the code and before the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(std::vector<Mips64Label*>&& labels) + : label_(), labels_(std::move(labels)) { + } + + size_t GetSize() const { + return labels_.size() * sizeof(uint32_t); + } + + const std::vector<Mips64Label*>& GetData() const { + return labels_; + } + + Mips64Label* GetLabel() { + return &label_; + } + + const Mips64Label* GetLabel() const { + return &label_; + } + + private: + Mips64Label label_; + std::vector<Mips64Label*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class Mips64ExceptionSlowPath { public: @@ -100,15 +410,22 @@ class Mips64ExceptionSlowPath { DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); }; -class Mips64Assembler FINAL : public Assembler { +class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> { public: - explicit Mips64Assembler(ArenaAllocator* arena) + using JNIBase = JNIMacroAssembler<PointerSize::k64>; + + explicit Mips64Assembler(ArenaAllocator* arena, + const Mips64InstructionSetFeatures* instruction_set_features = nullptr) : Assembler(arena), overwriting_(false), overwrite_location_(0), + literals_(arena->Adapter(kArenaAllocAssembler)), + long_literals_(arena->Adapter(kArenaAllocAssembler)), + jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), - last_branch_id_(0) { + last_branch_id_(0), + has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false) { cfi().DelayEmittingAdvancePCs(); } @@ -118,6 +435,9 @@ class Mips64Assembler FINAL : public Assembler { } } + size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); } + DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } + // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); @@ -148,18 +468,20 @@ class Mips64Assembler FINAL : public Assembler { void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Bitswap(GpuRegister rd, GpuRegister rt); - void Dbitswap(GpuRegister rd, GpuRegister rt); + void Dbitswap(GpuRegister rd, GpuRegister rt); // MIPS64 void Seb(GpuRegister rd, GpuRegister rt); void Seh(GpuRegister rd, GpuRegister rt); - void Dsbh(GpuRegister rd, GpuRegister rt); - void Dshd(GpuRegister rd, GpuRegister rt); + void Dsbh(GpuRegister rd, GpuRegister rt); // MIPS64 + void Dshd(GpuRegister rd, GpuRegister rt); // MIPS64 void Dext(GpuRegister rs, GpuRegister rt, int pos, int size); // MIPS64 void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size); // MIPS64 + void Lsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne); + void Dlsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne); // MIPS64 void Wsbh(GpuRegister rd, GpuRegister rt); void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); - void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); + void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); // MIPS64 void Ll(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); - void Lld(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); + void Lld(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); // MIPS64 void Sll(GpuRegister rd, GpuRegister rt, int shamt); void Srl(GpuRegister rd, GpuRegister rt, int shamt); @@ -171,7 +493,7 @@ class Mips64Assembler FINAL : public Assembler { void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs); void Dsll(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsrl(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 - void Drotr(GpuRegister rd, GpuRegister rt, int shamt); + void Drotr(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsra(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsll32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 @@ -189,7 +511,12 @@ class Mips64Assembler FINAL : public Assembler { void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 + void Lwpc(GpuRegister rs, uint32_t imm19); + void Lwupc(GpuRegister rs, uint32_t imm19); // MIPS64 + void Ldpc(GpuRegister rs, uint32_t imm18); // MIPS64 void Lui(GpuRegister rt, uint16_t imm16); + void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 void Sync(uint32_t stype); @@ -207,8 +534,8 @@ class Mips64Assembler FINAL : public Assembler { void Selnez(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Clz(GpuRegister rd, GpuRegister rs); void Clo(GpuRegister rd, GpuRegister rs); - void Dclz(GpuRegister rd, GpuRegister rs); - void Dclo(GpuRegister rd, GpuRegister rs); + void Dclz(GpuRegister rd, GpuRegister rs); // MIPS64 + void Dclo(GpuRegister rd, GpuRegister rs); // MIPS64 void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); @@ -216,6 +543,7 @@ class Mips64Assembler FINAL : public Assembler { void Auipc(GpuRegister rs, uint16_t imm16); void Addiupc(GpuRegister rs, uint32_t imm19); void Bc(uint32_t imm26); + void Balc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -320,12 +648,154 @@ class Mips64Assembler FINAL : public Assembler { void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); + // MSA instructions. + void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void Ffint_sW(VectorRegister wd, VectorRegister ws); + void Ffint_sD(VectorRegister wd, VectorRegister ws); + void Ftint_sW(VectorRegister wd, VectorRegister ws); + void Ftint_sD(VectorRegister wd, VectorRegister ws); + + void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1). + void SlliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SlliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SlliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SlliD(VectorRegister wd, VectorRegister ws, int shamt6); + void SraiB(VectorRegister wd, VectorRegister ws, int shamt3); + void SraiH(VectorRegister wd, VectorRegister ws, int shamt4); + void SraiW(VectorRegister wd, VectorRegister ws, int shamt5); + void SraiD(VectorRegister wd, VectorRegister ws, int shamt6); + void SrliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SrliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SrliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SrliD(VectorRegister wd, VectorRegister ws, int shamt6); + + void MoveV(VectorRegister wd, VectorRegister ws); + void SplatiB(VectorRegister wd, VectorRegister ws, int n4); + void SplatiH(VectorRegister wd, VectorRegister ws, int n3); + void SplatiW(VectorRegister wd, VectorRegister ws, int n2); + void SplatiD(VectorRegister wd, VectorRegister ws, int n1); + void FillB(VectorRegister wd, GpuRegister rs); + void FillH(VectorRegister wd, GpuRegister rs); + void FillW(VectorRegister wd, GpuRegister rs); + void FillD(VectorRegister wd, GpuRegister rs); + + void LdiB(VectorRegister wd, int imm8); + void LdiH(VectorRegister wd, int imm10); + void LdiW(VectorRegister wd, int imm10); + void LdiD(VectorRegister wd, int imm10); + void LdB(VectorRegister wd, GpuRegister rs, int offset); + void LdH(VectorRegister wd, GpuRegister rs, int offset); + void LdW(VectorRegister wd, GpuRegister rs, int offset); + void LdD(VectorRegister wd, GpuRegister rs, int offset); + void StB(VectorRegister wd, GpuRegister rs, int offset); + void StH(VectorRegister wd, GpuRegister rs, int offset); + void StW(VectorRegister wd, GpuRegister rs, int offset); + void StD(VectorRegister wd, GpuRegister rs, int offset); + // Higher level composite instructions. + int InstrCountForLoadReplicatedConst32(int64_t); void LoadConst32(GpuRegister rd, int32_t value); void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 + // This function is only used for testing purposes. + void RecordLoadConst64Path(int value); + + void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 + // + // Heap poisoning. + // + + // Poison a heap reference contained in `src` and store it in `dst`. + void PoisonHeapReference(GpuRegister dst, GpuRegister src) { + // dst = -src. + // Negate the 32-bit ref. + Dsubu(dst, ZERO, src); + // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. + Dext(dst, dst, 0, 32); + } + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(GpuRegister reg) { + // reg = -reg. + PoisonHeapReference(reg, reg); + } + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(GpuRegister reg) { + // reg = -reg. + // Negate the 32-bit ref. + Dsubu(reg, ZERO, reg); + // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. + Dext(reg, reg, 0, 32); + } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(GpuRegister reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(GpuRegister reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } + } + void Bind(Label* label) OVERRIDE { Bind(down_cast<Mips64Label*>(label)); } @@ -334,8 +804,61 @@ class Mips64Assembler FINAL : public Assembler { } void Bind(Mips64Label* label); + + // Don't warn about a different virtual Bind/Jump in the base class. + using JNIBase::Bind; + using JNIBase::Jump; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS64"; + UNREACHABLE(); + } + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS64"; + UNREACHABLE(); + } + // Emit a conditional jump to the label by applying a unary condition test to the register. + void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED, + JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED, + ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS64"; + UNREACHABLE(); + } + + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Not implemented on MIPS64"; + UNREACHABLE(); + } + + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Load label address using PC-relative loads. To be used with data labels in the literal / + // jump table area only and not with regular code labels. + void LoadLabelAddress(GpuRegister dest_reg, Mips64Label* label); + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Load literal using PC-relative loads. + void LoadLiteral(GpuRegister dest_reg, LoadOperandType load_type, Literal* literal); + + // Create a jump table for the given labels that will be emitted when finalizing. + // When the table is emitted, offsets will be relative to the location of the table. + // The table location is determined by the location of its label (the label precedes + // the table data) and should be loaded using LoadLabelAddress(). + JumpTable* CreateJumpTable(std::vector<Mips64Label*>&& labels); + void Bc(Mips64Label* label); - void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Balc(Mips64Label* label); void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); void Bltzc(GpuRegister rt, Mips64Label* label); void Bgtzc(GpuRegister rt, Mips64Label* label); @@ -352,6 +875,240 @@ class Mips64Assembler FINAL : public Assembler { void Bc1nez(FpuRegister ft, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); + void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword); + + private: + // This will be used as an argument for loads/stores + // when there is no need for implicit null checks. + struct NoImplicitNullChecker { + void operator()() const {} + }; + + public: + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreConstToOffset(StoreOperandType type, + int64_t value, + GpuRegister base, + int32_t offset, + GpuRegister temp, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // We permit `base` and `temp` to coincide (however, we check that neither is AT), + // in which case the `base` register may be overwritten in the process. + CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + GpuRegister reg; + // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp` + // to load and hold the value but we can use AT instead as AT hasn't been used yet. + // Otherwise, `temp` can be used for the value. And if `temp` is the same as the + // original `base` (that is, `base` prior to the adjustment), the original `base` + // register will be overwritten. + if (base == temp) { + temp = AT; + } + + if (type == kStoreDoubleword && IsAligned<kMips64DoublewordSize>(offset)) { + if (value == 0) { + reg = ZERO; + } else { + reg = temp; + LoadConst64(reg, value); + } + Sd(reg, base, offset); + null_checker(); + } else { + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (low == 0) { + reg = ZERO; + } else { + reg = temp; + LoadConst32(reg, low); + } + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + Sw(reg, base, offset); + break; + case kStoreDoubleword: + // not aligned to kMips64DoublewordSize + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + null_checker(); + if (high == 0) { + reg = ZERO; + } else { + reg = temp; + if (high != low) { + LoadConst32(reg, high); + } + } + Sw(reg, base, offset + kMips64WordSize); + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFromOffset(LoadOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + + switch (type) { + case kLoadSignedByte: + Lb(reg, base, offset); + break; + case kLoadUnsignedByte: + Lbu(reg, base, offset); + break; + case kLoadSignedHalfword: + Lh(reg, base, offset); + break; + case kLoadUnsignedHalfword: + Lhu(reg, base, offset); + break; + case kLoadWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lw(reg, base, offset); + break; + case kLoadUnsignedWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lwu(reg, base, offset); + break; + case kLoadDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Lwu(reg, base, offset); + null_checker(); + Lwu(TMP2, base, offset + kMips64WordSize); + Dinsu(reg, TMP2, 32, 32); + } else { + Ld(reg, base, offset); + null_checker(); + } + break; + } + if (type != kLoadDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFpuFromOffset(LoadOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + + switch (type) { + case kLoadWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lwc1(reg, base, offset); + null_checker(); + break; + case kLoadDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Lwc1(reg, base, offset); + null_checker(); + Lw(TMP2, base, offset + kMips64WordSize); + Mthc1(TMP2, reg); + } else { + Ldc1(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreToOffset(StoreOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + // Must not use AT as `reg`, so as not to overwrite the value being stored + // with the adjusted `base`. + CHECK_NE(reg, AT); + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + break; + case kStoreDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + null_checker(); + Dsrl32(TMP2, reg, 0); + Sw(TMP2, base, offset + kMips64WordSize); + } else { + Sd(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreFpuToOffset(StoreOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + + switch (type) { + case kStoreWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Swc1(reg, base, offset); + null_checker(); + break; + case kStoreDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Mfhc1(TMP2, reg); + Swc1(reg, base, offset); + null_checker(); + Sw(TMP2, base, offset + kMips64WordSize); + } else { + Sdc1(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + } + void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset); void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -365,13 +1122,13 @@ class Mips64Assembler FINAL : public Assembler { // // Emit code that will create an activation on the stack. - void BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; // Emit code that will remove an activation from the stack. - void RemoveFrame(size_t frame_size, - const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; void DecreaseFrameSize(size_t adjust) OVERRIDE; @@ -383,10 +1140,11 @@ class Mips64Assembler FINAL : public Assembler { void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreStackOffsetToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs, - ManagedRegister mscratch) OVERRIDE; + void StoreStackOffsetToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; @@ -394,9 +1152,7 @@ class Mips64Assembler FINAL : public Assembler { // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister mdest, - ThreadOffset<kMips64DoublewordSize> src, - size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -405,18 +1161,19 @@ class Mips64Assembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<kMips64DoublewordSize> offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE; // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMips64DoublewordSize> thr_offs, - ManagedRegister mscratch) OVERRIDE; - - void CopyRawPtrToThread64(ThreadOffset<kMips64DoublewordSize> thr_offs, FrameOffset fr_offs, + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 thr_offs, ManagedRegister mscratch) OVERRIDE; + void CopyRawPtrToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE; @@ -471,8 +1228,7 @@ class Mips64Assembler FINAL : public Assembler { // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread64(ThreadOffset<kMips64DoublewordSize> offset, - ManagedRegister mscratch) OVERRIDE; + void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. @@ -486,12 +1242,15 @@ class Mips64Assembler FINAL : public Assembler { // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, // must be used instead of Mips64Label::GetPosition()). - uint32_t GetLabelLocation(Mips64Label* label) const; + uint32_t GetLabelLocation(const Mips64Label* label) const; // Get the final position of a label after local fixup based on the old position // recorded before FinalizeCode(). uint32_t GetAdjustedPosition(uint32_t old_position); + // Note that PC-relative literal loads are handled as pseudo branches because they need very + // similar relocation and may similarly expand in size to accomodate for larger offsets relative + // to PC. enum BranchCondition { kCondLT, kCondGE, @@ -521,10 +1280,22 @@ class Mips64Assembler FINAL : public Assembler { kUncondBranch, kCondBranch, kCall, + // Near label. + kLabel, + // Near literals. + kLiteral, + kLiteralUnsigned, + kLiteralLong, // Long branches. kLongUncondBranch, kLongCondBranch, kLongCall, + // Far label. + kFarLabel, + // Far literals. + kFarLiteral, + kFarLiteralUnsigned, + kFarLiteralLong, }; // Bit sizes of offsets defined as enums to minimize chance of typos. @@ -560,16 +1331,16 @@ class Mips64Assembler FINAL : public Assembler { }; static const BranchInfo branch_info_[/* Type */]; - // Unconditional branch. - Branch(uint32_t location, uint32_t target); + // Unconditional branch or call. + Branch(uint32_t location, uint32_t target, bool is_call); // Conditional branch. Branch(uint32_t location, uint32_t target, BranchCondition condition, GpuRegister lhs_reg, - GpuRegister rhs_reg = ZERO); - // Call (branch and link) that stores the target address in a given register (i.e. T9). - Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + GpuRegister rhs_reg); + // Label address (in literal area) or literal. + Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type); // Some conditional branches with lhs = rhs are effectively NOPs, while some // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. @@ -653,7 +1424,7 @@ class Mips64Assembler FINAL : public Assembler { private: // Completes branch construction by determining and recording its type. - void InitializeType(bool is_call); + void InitializeType(Type initial_type); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); @@ -662,7 +1433,7 @@ class Mips64Assembler FINAL : public Assembler { uint32_t target_; // Offset into assembler buffer in bytes. GpuRegister lhs_reg_; // Left-hand side register in conditional branches or - // indirect call register. + // destination register in literals. GpuRegister rhs_reg_; // Right-hand side register in conditional branches. BranchCondition condition_; // Condition for conditional branches. @@ -681,18 +1452,33 @@ class Mips64Assembler FINAL : public Assembler { void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); + void EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); + void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df); + void EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode); + void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); void Buncond(Mips64Label* label); void Bcond(Mips64Label* label, BranchCondition condition, GpuRegister lhs, GpuRegister rhs = ZERO); - void Call(Mips64Label* label, GpuRegister indirect_reg); + void Call(Mips64Label* label); void FinalizeLabeledBranch(Mips64Label* label); Branch* GetBranch(uint32_t branch_id); const Branch* GetBranch(uint32_t branch_id) const; + void EmitLiterals(); + void ReserveJumpTableSpace(); + void EmitJumpTables(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -701,6 +1487,10 @@ class Mips64Assembler FINAL : public Assembler { // Emits exception block. void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + bool HasMsa() const { + return has_msa_; + } + // List of exception blocks to generate at the end of the code cache. std::vector<Mips64ExceptionSlowPath> exception_blocks_; @@ -711,11 +1501,21 @@ class Mips64Assembler FINAL : public Assembler { // The current overwrite location. uint32_t overwrite_location_; + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons. + + // Jump table list. + ArenaDeque<JumpTable> jump_tables_; + // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; uint32_t last_branch_id_; + const bool has_msa_; + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index b758d64c1e..f2e3b1610c 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -37,12 +37,17 @@ struct MIPS64CpuRegisterCompare { class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> { + uint32_t, + mips64::VectorRegister> { public: typedef AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> Base; + uint32_t, + mips64::VectorRegister> Base; + + AssemblerMIPS64Test() + : instruction_set_features_(Mips64InstructionSetFeatures::FromVariant("default", nullptr)) {} protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... @@ -60,7 +65,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative // branches in the .text section and so they require a relocation pass (there's a relocation // section, .rela.text, that has the needed info to fix up the branches). - return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + return " -march=mips64r6 -mmsa -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; } void Pad(std::vector<uint8_t>& data) OVERRIDE { @@ -76,6 +81,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return " -D -bbinary -mmips:isa64r6"; } + mips64::Mips64Assembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE { + return new (arena) mips64::Mips64Assembler(arena, instruction_set_features_.get()); + } + void SetUpHelpers() OVERRIDE { if (registers_.size() == 0) { registers_.push_back(new mips64::GpuRegister(mips64::ZERO)); @@ -176,6 +185,39 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, fp_registers_.push_back(new mips64::FpuRegister(mips64::F29)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F30)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F31)); + + vec_registers_.push_back(new mips64::VectorRegister(mips64::W0)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W1)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W2)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W3)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W4)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W5)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W6)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W7)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W8)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W9)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W10)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W11)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W12)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W13)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W14)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W15)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W16)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W17)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W18)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W19)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W20)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W21)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W22)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W23)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W24)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W25)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W26)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W27)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W28)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W29)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W30)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W31)); } } @@ -183,6 +225,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); + STLDeleteElements(&vec_registers_); } std::vector<mips64::GpuRegister*> GetRegisters() OVERRIDE { @@ -193,6 +236,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return fp_registers_; } + std::vector<mips64::VectorRegister*> GetVectorRegisters() OVERRIDE { + return vec_registers_; + } + uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { return imm_value; } @@ -212,7 +259,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, mips64::Mips64Label*), - std::string instr_name) { + const std::string& instr_name) { mips64::Mips64Label label; (Base::GetAssembler()->*f)(mips64::A0, &label); constexpr size_t kAdduCount1 = 63; @@ -241,7 +288,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, mips64::GpuRegister, mips64::Mips64Label*), - std::string instr_name) { + const std::string& instr_name) { mips64::Mips64Label label; (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -272,8 +319,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; std::vector<mips64::FpuRegister*> fp_registers_; -}; + std::vector<mips64::VectorRegister*> vec_registers_; + std::unique_ptr<const Mips64InstructionSetFeatures> instruction_set_features_; +}; TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); @@ -283,6 +332,38 @@ TEST_F(AssemblerMIPS64Test, Toolchain) { // FP Operations // /////////////////// +TEST_F(AssemblerMIPS64Test, AddS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::AddS, "add.s ${reg1}, ${reg2}, ${reg3}"), "add.s"); +} + +TEST_F(AssemblerMIPS64Test, AddD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::AddD, "add.d ${reg1}, ${reg2}, ${reg3}"), "add.d"); +} + +TEST_F(AssemblerMIPS64Test, SubS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SubS, "sub.s ${reg1}, ${reg2}, ${reg3}"), "sub.s"); +} + +TEST_F(AssemblerMIPS64Test, SubD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SubD, "sub.d ${reg1}, ${reg2}, ${reg3}"), "sub.d"); +} + +TEST_F(AssemblerMIPS64Test, MulS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::MulS, "mul.s ${reg1}, ${reg2}, ${reg3}"), "mul.s"); +} + +TEST_F(AssemblerMIPS64Test, MulD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::MulD, "mul.d ${reg1}, ${reg2}, ${reg3}"), "mul.d"); +} + +TEST_F(AssemblerMIPS64Test, DivS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::DivS, "div.s ${reg1}, ${reg2}, ${reg3}"), "div.s"); +} + +TEST_F(AssemblerMIPS64Test, DivD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::DivD, "div.d ${reg1}, ${reg2}, ${reg3}"), "div.d"); +} + TEST_F(AssemblerMIPS64Test, SqrtS) { DriverStr(RepeatFF(&mips64::Mips64Assembler::SqrtS, "sqrt.s ${reg1}, ${reg2}"), "sqrt.s"); } @@ -567,6 +648,26 @@ TEST_F(AssemblerMIPS64Test, Dmtc1) { DriverStr(RepeatRF(&mips64::Mips64Assembler::Dmtc1, "dmtc1 ${reg1}, ${reg2}"), "Dmtc1"); } +TEST_F(AssemblerMIPS64Test, Lwc1) { + DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Lwc1, -16, "lwc1 ${reg1}, {imm}(${reg2})"), + "lwc1"); +} + +TEST_F(AssemblerMIPS64Test, Ldc1) { + DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Ldc1, -16, "ldc1 ${reg1}, {imm}(${reg2})"), + "ldc1"); +} + +TEST_F(AssemblerMIPS64Test, Swc1) { + DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Swc1, -16, "swc1 ${reg1}, {imm}(${reg2})"), + "swc1"); +} + +TEST_F(AssemblerMIPS64Test, Sdc1) { + DriverStr(RepeatFRIb(&mips64::Mips64Assembler::Sdc1, -16, "sdc1 ${reg1}, {imm}(${reg2})"), + "sdc1"); +} + //////////////// // CALL / JMP // //////////////// @@ -576,83 +677,83 @@ TEST_F(AssemblerMIPS64Test, Jalr) { RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); } -TEST_F(AssemblerMIPS64Test, Jialc) { +TEST_F(AssemblerMIPS64Test, Balc) { mips64::Mips64Label label1, label2; - __ Jialc(&label1, mips64::T9); + __ Balc(&label1); constexpr size_t kAdduCount1 = 63; for (size_t i = 0; i != kAdduCount1; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); } __ Bind(&label1); - __ Jialc(&label2, mips64::T9); + __ Balc(&label2); constexpr size_t kAdduCount2 = 64; for (size_t i = 0; i != kAdduCount2; ++i) { __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); } __ Bind(&label2); - __ Jialc(&label1, mips64::T9); + __ Balc(&label1); std::string expected = ".set noreorder\n" - "lapc $t9, 1f\n" - "jialc $t9, 0\n" + + "balc 1f\n" + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + "1:\n" - "lapc $t9, 2f\n" - "jialc $t9, 0\n" + + "balc 2f\n" + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + "2:\n" - "lapc $t9, 1b\n" - "jialc $t9, 0\n"; - DriverStr(expected, "Jialc"); + "balc 1b\n"; + DriverStr(expected, "Balc"); } -TEST_F(AssemblerMIPS64Test, LongJialc) { +TEST_F(AssemblerMIPS64Test, LongBalc) { + constexpr uint32_t kNopCount1 = (1u << 25) + 1; + constexpr uint32_t kNopCount2 = (1u << 25) + 1; + constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u; + ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity); + __ GetBuffer()->ExtendCapacity(kRequiredCapacity); mips64::Mips64Label label1, label2; - __ Jialc(&label1, mips64::T9); - constexpr uint32_t kAdduCount1 = (1u << 18) + 1; - for (uint32_t i = 0; i != kAdduCount1; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + __ Balc(&label1); + for (uint32_t i = 0; i != kNopCount1; ++i) { + __ Nop(); } __ Bind(&label1); - __ Jialc(&label2, mips64::T9); - constexpr uint32_t kAdduCount2 = (1u << 18) + 1; - for (uint32_t i = 0; i != kAdduCount2; ++i) { - __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + __ Balc(&label2); + for (uint32_t i = 0; i != kNopCount2; ++i) { + __ Nop(); } __ Bind(&label2); - __ Jialc(&label1, mips64::T9); + __ Balc(&label1); - uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jialc. offset_forward1 <<= 2; - offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jialc. - uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jialc. offset_forward2 <<= 2; - offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jialc. - uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jialc. offset_back <<= 2; - offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jialc. + // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs + // instead of generating them ourselves in the source code. This saves a few minutes + // of test time. std::ostringstream oss; oss << ".set noreorder\n" - "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" - "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" - "jialc $t9, 0\n" << - RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n" "1:\n" - "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" - "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" - "jialc $t9, 0\n" << - RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n" "2:\n" - "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" - "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" - "jialc $t9, 0\n"; + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jialc $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"; std::string expected = oss.str(); - DriverStr(expected, "LongJialc"); + DriverStr(expected, "LongBalc"); } TEST_F(AssemblerMIPS64Test, Bc) { @@ -827,6 +928,468 @@ TEST_F(AssemblerMIPS64Test, LongBeqc) { // MISC // ////////// +TEST_F(AssemblerMIPS64Test, Lwpc) { + // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset, + // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`. + // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right + // by 2 positions when encoding, hence `<< 2` to compensate for that shift. + // We capture the value of the immediate with `.set imm, {imm}` because the value is needed + // twice for the sign extension, but `{imm}` is substituted only once. + const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lwpc, 19, code), "Lwpc"); +} + +TEST_F(AssemblerMIPS64Test, Lwupc) { + // The comment for the Lwpc test applies here as well. + const char* code = ".set imm, {imm}\nlwu ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lwupc, 19, code), "Lwupc"); +} + +TEST_F(AssemblerMIPS64Test, Ldpc) { + // The comment for the Lwpc test applies here as well. + const char* code = ".set imm, {imm}\nld ${reg}, ((imm - ((imm & 0x20000) << 1)) << 3)($pc)"; + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Ldpc, 18, code), "Ldpc"); +} + +TEST_F(AssemblerMIPS64Test, Auipc) { + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc"); +} + +TEST_F(AssemblerMIPS64Test, Addiupc) { + // The comment from the Lwpc() test applies to this Addiupc() test as well. + const char* code = ".set imm, {imm}\naddiupc ${reg}, (imm - ((imm & 0x40000) << 1)) << 2"; + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Addiupc, 19, code), "Addiupc"); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) { + mips64::Mips64Label label; + __ LoadLabelAddress(mips64::V0, &label); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + + std::string expected = + "lapc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); + EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) { + mips64::Mips64Label label; + __ LoadLabelAddress(mips64::V0, &label); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "daddiu $v0, $at, %lo(2f - 1b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); + EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "lwupc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteralUnsigned"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) { + mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lwu $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteralUnsigned"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + constexpr uint32_t kAdduCount = 0x3FFDD; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "ldpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "LoadFarthestNearLiteralLong"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + constexpr uint32_t kAdduCount = 0x3FFDE; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "ld $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "LoadNearestFarLiteralLong"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) { + mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); + mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA)); + __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); + __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); + __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3); + __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); + __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); + // A nop will be inserted here before the 64-bit literals. + + std::string expected = + "ldpc $a1, 1f\n" + // The GNU assembler incorrectly requires the ldpc instruction to be located + // at an address that's a multiple of 8. TODO: Remove this workaround if/when + // the assembler is fixed. + // "ldpc $a2, 2f\n" + ".word 0xECD80004\n" + "ldpc $a3, 3f\n" + "lapc $v0, 1f\n" + "lapc $v1, 2f\n" + "nop\n" + "1:\n" + ".dword 0x0123456789ABCDEF\n" + "2:\n" + ".dword 0x5555555555555555\n" + "3:\n" + ".dword 0xAAAAAAAAAAAAAAAA\n"; + DriverStr(expected, "LongLiteralAlignmentNop"); + EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u); +} + +TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) { + mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555)); + __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1); + __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2); + __ LoadLabelAddress(mips64::V0, literal1->GetLabel()); + __ LoadLabelAddress(mips64::V1, literal2->GetLabel()); + + std::string expected = + "ldpc $a1, 1f\n" + // The GNU assembler incorrectly requires the ldpc instruction to be located + // at an address that's a multiple of 8. TODO: Remove this workaround if/when + // the assembler is fixed. + // "ldpc $a2, 2f\n" + ".word 0xECD80003\n" + "lapc $v0, 1f\n" + "lapc $v1, 2f\n" + "1:\n" + ".dword 0x0123456789ABCDEF\n" + "2:\n" + ".dword 0x5555555555555555\n"; + DriverStr(expected, "LongLiteralAlignmentNoNop"); + EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u); + EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u); +} + +TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) { + mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF)); + __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal); + __ LoadLabelAddress(mips64::V1, literal->GetLabel()); + constexpr uint32_t kAdduCount = 0x3FFDF; + for (uint32_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + // A nop will be inserted here before the 64-bit literal. + + std::string expected = + "1:\n" + "auipc $at, %hi(3f - 1b)\n" + "ld $v0, %lo(3f - 1b)($at)\n" + "2:\n" + "auipc $at, %hi(3f - 2b)\n" + "daddiu $v1, $at, %lo(3f - 2b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "nop\n" + "3:\n" + ".dword 0x0123456789ABCDEF\n"; + DriverStr(expected, "FarLongLiteralAlignmentNop"); + EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4); +} + +TEST_F(AssemblerMIPS64Test, Addu) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "addu"); +} + +TEST_F(AssemblerMIPS64Test, Addiu) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Addiu, -16, "addiu ${reg1}, ${reg2}, {imm}"), + "addiu"); +} + +TEST_F(AssemblerMIPS64Test, Daddu) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Daddu, "daddu ${reg1}, ${reg2}, ${reg3}"), "daddu"); +} + +TEST_F(AssemblerMIPS64Test, Daddiu) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Daddiu, -16, "daddiu ${reg1}, ${reg2}, {imm}"), + "daddiu"); +} + +TEST_F(AssemblerMIPS64Test, Subu) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Subu, "subu ${reg1}, ${reg2}, ${reg3}"), "subu"); +} + +TEST_F(AssemblerMIPS64Test, Dsubu) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsubu, "dsubu ${reg1}, ${reg2}, ${reg3}"), "dsubu"); +} + +TEST_F(AssemblerMIPS64Test, MulR6) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::MulR6, "mul ${reg1}, ${reg2}, ${reg3}"), "mulR6"); +} + +TEST_F(AssemblerMIPS64Test, DivR6) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::DivR6, "div ${reg1}, ${reg2}, ${reg3}"), "divR6"); +} + +TEST_F(AssemblerMIPS64Test, ModR6) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::ModR6, "mod ${reg1}, ${reg2}, ${reg3}"), "modR6"); +} + +TEST_F(AssemblerMIPS64Test, DivuR6) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::DivuR6, "divu ${reg1}, ${reg2}, ${reg3}"), + "divuR6"); +} + +TEST_F(AssemblerMIPS64Test, ModuR6) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::ModuR6, "modu ${reg1}, ${reg2}, ${reg3}"), + "moduR6"); +} + +TEST_F(AssemblerMIPS64Test, Dmul) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dmul, "dmul ${reg1}, ${reg2}, ${reg3}"), "dmul"); +} + +TEST_F(AssemblerMIPS64Test, Ddiv) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Ddiv, "ddiv ${reg1}, ${reg2}, ${reg3}"), "ddiv"); +} + +TEST_F(AssemblerMIPS64Test, Dmod) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dmod, "dmod ${reg1}, ${reg2}, ${reg3}"), "dmod"); +} + +TEST_F(AssemblerMIPS64Test, Ddivu) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Ddivu, "ddivu ${reg1}, ${reg2}, ${reg3}"), "ddivu"); +} + +TEST_F(AssemblerMIPS64Test, Dmodu) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dmodu, "dmodu ${reg1}, ${reg2}, ${reg3}"), "dmodu"); +} + +TEST_F(AssemblerMIPS64Test, And) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::And, "and ${reg1}, ${reg2}, ${reg3}"), "and"); +} + +TEST_F(AssemblerMIPS64Test, Andi) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Andi, 16, "andi ${reg1}, ${reg2}, {imm}"), "andi"); +} + +TEST_F(AssemblerMIPS64Test, Or) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Or, "or ${reg1}, ${reg2}, ${reg3}"), "or"); +} + +TEST_F(AssemblerMIPS64Test, Ori) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Ori, 16, "ori ${reg1}, ${reg2}, {imm}"), "ori"); +} + +TEST_F(AssemblerMIPS64Test, Xor) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Xor, "xor ${reg1}, ${reg2}, ${reg3}"), "xor"); +} + +TEST_F(AssemblerMIPS64Test, Xori) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Xori, 16, "xori ${reg1}, ${reg2}, {imm}"), "xori"); +} + +TEST_F(AssemblerMIPS64Test, Nor) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Nor, "nor ${reg1}, ${reg2}, ${reg3}"), "nor"); +} + +TEST_F(AssemblerMIPS64Test, Lb) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "lb"); +} + +TEST_F(AssemblerMIPS64Test, Lh) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lh, -16, "lh ${reg1}, {imm}(${reg2})"), "lh"); +} + +TEST_F(AssemblerMIPS64Test, Lw) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lw, -16, "lw ${reg1}, {imm}(${reg2})"), "lw"); +} + +TEST_F(AssemblerMIPS64Test, Ld) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Ld, -16, "ld ${reg1}, {imm}(${reg2})"), "ld"); +} + +TEST_F(AssemblerMIPS64Test, Lbu) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lbu, -16, "lbu ${reg1}, {imm}(${reg2})"), "lbu"); +} + +TEST_F(AssemblerMIPS64Test, Lhu) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lhu, -16, "lhu ${reg1}, {imm}(${reg2})"), "lhu"); +} + +TEST_F(AssemblerMIPS64Test, Lwu) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lwu, -16, "lwu ${reg1}, {imm}(${reg2})"), "lwu"); +} + +TEST_F(AssemblerMIPS64Test, Lui) { + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui"); +} + +TEST_F(AssemblerMIPS64Test, Daui) { + std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters(); + std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters(); + reg2_registers.erase(reg2_registers.begin()); // reg2 can't be ZERO, remove it. + std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true); + WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); + std::ostringstream expected; + for (mips64::GpuRegister* reg1 : reg1_registers) { + for (mips64::GpuRegister* reg2 : reg2_registers) { + for (int64_t imm : imms) { + __ Daui(*reg1, *reg2, imm); + expected << "daui $" << *reg1 << ", $" << *reg2 << ", " << imm << "\n"; + } + } + } + DriverStr(expected.str(), "daui"); +} + +TEST_F(AssemblerMIPS64Test, Dahi) { + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi"); +} + +TEST_F(AssemblerMIPS64Test, Dati) { + DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dati, 16, "dati ${reg}, ${reg}, {imm}"), "dati"); +} + +TEST_F(AssemblerMIPS64Test, Sb) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sb, -16, "sb ${reg1}, {imm}(${reg2})"), "sb"); +} + +TEST_F(AssemblerMIPS64Test, Sh) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sh, -16, "sh ${reg1}, {imm}(${reg2})"), "sh"); +} + +TEST_F(AssemblerMIPS64Test, Sw) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sw, -16, "sw ${reg1}, {imm}(${reg2})"), "sw"); +} + +TEST_F(AssemblerMIPS64Test, Sd) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sd, -16, "sd ${reg1}, {imm}(${reg2})"), "sd"); +} + +TEST_F(AssemblerMIPS64Test, Slt) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Slt, "slt ${reg1}, ${reg2}, ${reg3}"), "slt"); +} + +TEST_F(AssemblerMIPS64Test, Sltu) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Sltu, "sltu ${reg1}, ${reg2}, ${reg3}"), "sltu"); +} + +TEST_F(AssemblerMIPS64Test, Slti) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Slti, -16, "slti ${reg1}, ${reg2}, {imm}"), + "slti"); +} + +TEST_F(AssemblerMIPS64Test, Sltiu) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sltiu, -16, "sltiu ${reg1}, ${reg2}, {imm}"), + "sltiu"); +} + +TEST_F(AssemblerMIPS64Test, Move) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Move, "or ${reg1}, ${reg2}, $zero"), "move"); +} + +TEST_F(AssemblerMIPS64Test, Clear) { + DriverStr(RepeatR(&mips64::Mips64Assembler::Clear, "or ${reg}, $zero, $zero"), "clear"); +} + +TEST_F(AssemblerMIPS64Test, Not) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Not, "nor ${reg1}, ${reg2}, $zero"), "not"); +} + TEST_F(AssemblerMIPS64Test, Bitswap) { DriverStr(RepeatRR(&mips64::Mips64Assembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); } @@ -889,6 +1452,22 @@ TEST_F(AssemblerMIPS64Test, Dinsu) { DriverStr(expected.str(), "Dinsu"); } +TEST_F(AssemblerMIPS64Test, Lsa) { + DriverStr(RepeatRRRIb(&mips64::Mips64Assembler::Lsa, + 2, + "lsa ${reg1}, ${reg2}, ${reg3}, {imm}", + 1), + "lsa"); +} + +TEST_F(AssemblerMIPS64Test, Dlsa) { + DriverStr(RepeatRRRIb(&mips64::Mips64Assembler::Dlsa, + 2, + "dlsa ${reg1}, ${reg2}, ${reg3}, {imm}", + 1), + "dlsa"); +} + TEST_F(AssemblerMIPS64Test, Wsbh) { DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh"); } @@ -962,6 +1541,18 @@ TEST_F(AssemblerMIPS64Test, Dsra32) { "dsra32"); } +TEST_F(AssemblerMIPS64Test, Dsllv) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsllv, "dsllv ${reg1}, ${reg2}, ${reg3}"), "dsllv"); +} + +TEST_F(AssemblerMIPS64Test, Dsrlv) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsrlv, "dsrlv ${reg1}, ${reg2}, ${reg3}"), "dsrlv"); +} + +TEST_F(AssemblerMIPS64Test, Dsrav) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Dsrav, "dsrav ${reg1}, ${reg2}, ${reg3}"), "dsrav"); +} + TEST_F(AssemblerMIPS64Test, Sc) { DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sc, -9, "sc ${reg1}, {imm}(${reg2})"), "sc"); } @@ -1018,6 +1609,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFFFFFF); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x80000001); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0); @@ -1032,6 +1627,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFFFFFF); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x80000001); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0); @@ -1046,6 +1645,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x80000002); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0); @@ -1060,6 +1663,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFE); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x80000002); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0); @@ -1074,6 +1681,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x80000004); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0); @@ -1088,6 +1699,10 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x80000004); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A0, 0); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0); @@ -1098,10 +1713,15 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8000); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8004); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x10000); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x27FFC); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x12345678); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -256); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -32768); __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0xABCDEF00); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x80000000); + __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x80000004); const char* expected = "lb $a0, 0($a0)\n" @@ -1110,25 +1730,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lb $a0, 256($a1)\n" "lb $a0, 1000($a1)\n" "lb $a0, 0x7FFF($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lb $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lb $a0, 1($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lb $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lb $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lb $a0, 9($at)\n" + "daui $at, $a1, 1\n" "lb $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lb $a0, 0x5678($at)\n" "lb $a0, -256($a1)\n" "lb $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lb $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lb $a0, -2($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lb $a0, -1($at)\n" + "daui $at, $a1, 32768\n" "lb $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lb $a0, 1($at)\n" "lbu $a0, 0($a0)\n" "lbu $a0, 0($a1)\n" @@ -1136,25 +1759,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lbu $a0, 256($a1)\n" "lbu $a0, 1000($a1)\n" "lbu $a0, 0x7FFF($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lbu $a0, 1($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lbu $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lbu $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lbu $a0, 9($at)\n" + "daui $at, $a1, 1\n" "lbu $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lbu $a0, 0x5678($at)\n" "lbu $a0, -256($a1)\n" "lbu $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lbu $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lbu $a0, -2($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lbu $a0, -1($at)\n" + "daui $at, $a1, 32768\n" "lbu $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lbu $a0, 1($at)\n" "lh $a0, 0($a0)\n" "lh $a0, 0($a1)\n" @@ -1162,25 +1788,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lh $a0, 256($a1)\n" "lh $a0, 1000($a1)\n" "lh $a0, 0x7FFE($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lh $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lh $a0, 2($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lh $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lh $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lh $a0, 10($at)\n" + "daui $at, $a1, 1\n" "lh $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lh $a0, 0x5678($at)\n" "lh $a0, -256($a1)\n" "lh $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lh $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lh $a0, -4($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lh $a0, -2($at)\n" + "daui $at, $a1, 32768\n" "lh $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lh $a0, 2($at)\n" "lhu $a0, 0($a0)\n" "lhu $a0, 0($a1)\n" @@ -1188,25 +1817,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lhu $a0, 256($a1)\n" "lhu $a0, 1000($a1)\n" "lhu $a0, 0x7FFE($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lhu $a0, 2($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lhu $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lhu $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lhu $a0, 10($at)\n" + "daui $at, $a1, 1\n" "lhu $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lhu $a0, 0x5678($at)\n" "lhu $a0, -256($a1)\n" "lhu $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lhu $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lhu $a0, -4($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lhu $a0, -2($at)\n" + "daui $at, $a1, 32768\n" "lhu $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lhu $a0, 2($at)\n" "lw $a0, 0($a0)\n" "lw $a0, 0($a1)\n" @@ -1214,25 +1846,28 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lw $a0, 256($a1)\n" "lw $a0, 1000($a1)\n" "lw $a0, 0x7FFC($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lw $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lw $a0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lw $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lw $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lw $a0, 12($at)\n" + "daui $at, $a1, 1\n" "lw $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lw $a0, 0x5678($at)\n" "lw $a0, -256($a1)\n" "lw $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lw $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lw $a0, -8($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lw $a0, -4($at)\n" + "daui $at, $a1, 32768\n" "lw $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lw $a0, 4($at)\n" "lwu $a0, 0($a0)\n" "lwu $a0, 0($a1)\n" @@ -1240,59 +1875,73 @@ TEST_F(AssemblerMIPS64Test, LoadFromOffset) { "lwu $a0, 256($a1)\n" "lwu $a0, 1000($a1)\n" "lwu $a0, 0x7FFC($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "lwu $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lwu $a0, 12($at)\n" + "daui $at, $a1, 1\n" "lwu $a0, 0($at)\n" + "daui $at, $a1, 0x1234\n" + "lwu $a0, 0x5678($at)\n" "lwu $a0, -256($a1)\n" "lwu $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" + "daui $at, $a1, 0xABCE\n" + "lwu $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lwu $a0, -8($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lwu $a0, -4($at)\n" + "daui $at, $a1, 32768\n" "lwu $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lwu $a0, 4($at)\n" "ld $a0, 0($a0)\n" "ld $a0, 0($a1)\n" "lwu $a0, 4($a1)\n" "lwu $t3, 8($a1)\n" - "dins $a0, $t3, 32, 32\n" + "dinsu $a0, $t3, 32, 32\n" "ld $a0, 256($a1)\n" "ld $a0, 1000($a1)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a1\n" - "lwu $a0, 4($at)\n" - "lwu $t3, 8($at)\n" - "dins $a0, $t3, 32, 32\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "ld $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 32760\n" "lwu $a0, 4($at)\n" "lwu $t3, 8($at)\n" - "dins $a0, $t3, 32, 32\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "ld $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "dinsu $a0, $t3, 32, 32\n" + "daddiu $at, $a1, 32760\n" + "ld $a0, 8($at)\n" + "daddiu $at, $a1, 32760\n" + "lwu $a0, 12($at)\n" + "lwu $t3, 16($at)\n" + "dinsu $a0, $t3, 32, 32\n" + "daui $at, $a1, 1\n" "ld $a0, 0($at)\n" + "daui $at, $a1, 2\n" + "daddiu $at, $at, 8\n" + "lwu $a0, 0x7ff4($at)\n" + "lwu $t3, 0x7ff8($at)\n" + "dinsu $a0, $t3, 32, 32\n" + "daui $at, $a1, 0x1234\n" + "ld $a0, 0x5678($at)\n" "ld $a0, -256($a1)\n" "ld $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "ld $a0, 0($at)\n"; + "daui $at, $a1, 0xABCE\n" + "ld $a0, -4352($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "ld $a0, -8($at)\n" + "daui $at, $a1, 32768\n" + "dahi $at, $at, 1\n" + "lwu $a0, -4($at)\n" + "lwu $t3, 0($at)\n" + "dinsu $a0, $t3, 32, 32\n" + "daui $at, $a1, 32768\n" + "ld $a0, 0($at)\n" + "daui $at, $a1, 32768\n" + "lwu $a0, 4($at)\n" + "lwu $t3, 8($at)\n" + "dinsu $a0, $t3, 32, 32\n"; DriverStr(expected, "LoadFromOffset"); } @@ -1326,57 +1975,42 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) { "lwc1 $f0, 4($a0)\n" "lwc1 $f0, 256($a0)\n" "lwc1 $f0, 0x7FFC($a0)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "lwc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "lwc1 $f0, 12($at)\n" + "daui $at, $a0, 1\n" "lwc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "lwc1 $f0, 22136($at) # 0x5678\n" "lwc1 $f0, -256($a0)\n" "lwc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 0($at)\n" + "daui $at, $a0, 0xABCE\n" + "lwc1 $f0, -0x1100($at) # 0xEF00\n" "ldc1 $f0, 0($a0)\n" "lwc1 $f0, 4($a0)\n" "lw $t3, 8($a0)\n" "mthc1 $t3, $f0\n" "ldc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" "lwc1 $f0, 4($at)\n" "lw $t3, 8($at)\n" "mthc1 $t3, $f0\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "ldc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "lwc1 $f0, 4($at)\n" - "lw $t3, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "ldc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "lwc1 $f0, 12($at)\n" + "lw $t3, 16($at)\n" "mthc1 $t3, $f0\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "ldc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "daui $at, $a0, 1\n" "ldc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "ldc1 $f0, 22136($at) # 0x5678\n" "ldc1 $f0, -256($a0)\n" "ldc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "ldc1 $f0, 0($at)\n"; + "daui $at, $a0, 0xABCE\n" + "ldc1 $f0, -0x1100($at) # 0xEF00\n"; DriverStr(expected, "LoadFpuFromOffset"); } @@ -1436,6 +2070,10 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -256); __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -32768); __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0xABCDEF00); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFFFFF8); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFFFFFC); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x80000000); + __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x80000004); const char* expected = "sb $a0, 0($a0)\n" @@ -1444,25 +2082,18 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sb $a0, 256($a1)\n" "sb $a0, 1000($a1)\n" "sb $a0, 0x7FFF($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sb $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sb $a0, 1($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sb $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "sb $a0, 8($at)\n" + "daddiu $at, $a1, 0x7FF8\n" + "sb $a0, 9($at)\n" + "daui $at, $a1, 1\n" "sb $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sb $a0, 22136($at) # 0x5678\n" "sb $a0, -256($a1)\n" "sb $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sb $a0, 0($at)\n" + "daui $at, $a1, 43982 # 0xABCE\n" + "sb $a0, -4352($at) # 0xEF00\n" "sh $a0, 0($a0)\n" "sh $a0, 0($a1)\n" @@ -1470,25 +2101,18 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sh $a0, 256($a1)\n" "sh $a0, 1000($a1)\n" "sh $a0, 0x7FFE($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sh $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sh $a0, 2($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sh $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "sh $a0, 8($at)\n" + "daddiu $at, $a1, 0x7FF8\n" + "sh $a0, 10($at)\n" + "daui $at, $a1, 1\n" "sh $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sh $a0, 22136($at) # 0x5678\n" "sh $a0, -256($a1)\n" "sh $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sh $a0, 0($at)\n" + "daui $at, $a1, 43982 # 0xABCE\n" + "sh $a0, -4352($at) # 0xEF00\n" "sw $a0, 0($a0)\n" "sw $a0, 0($a1)\n" @@ -1496,25 +2120,18 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sw $a0, 256($a1)\n" "sw $a0, 1000($a1)\n" "sw $a0, 0x7FFC($a1)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sw $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sw $a0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sw $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" + "sw $a0, 8($at)\n" + "daddiu $at, $a1, 0x7FF8\n" + "sw $a0, 12($at)\n" + "daui $at, $a1, 1\n" "sw $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sw $a0, 22136($at) # 0x5678\n" "sw $a0, -256($a1)\n" "sw $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sw $a0, 0($at)\n" + "daui $at, $a1, 43982 # 0xABCE\n" + "sw $a0, -4352($at) # 0xEF00\n" "sd $a0, 0($a0)\n" "sd $a0, 0($a1)\n" @@ -1523,32 +2140,38 @@ TEST_F(AssemblerMIPS64Test, StoreToOffset) { "sw $t3, 8($a1)\n" "sd $a0, 256($a1)\n" "sd $a0, 1000($a1)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a1\n" + "daddiu $at, $a1, 0x7FF8\n" "sw $a0, 4($at)\n" "dsrl32 $t3, $a0, 0\n" "sw $t3, 8($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sd $a0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a1\n" - "sw $a0, 4($at)\n" + "daddiu $at, $a1, 32760 # 0x7FF8\n" + "sd $a0, 8($at)\n" + "daddiu $at, $a1, 32760 # 0x7FF8\n" + "sw $a0, 12($at)\n" "dsrl32 $t3, $a0, 0\n" - "sw $t3, 8($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a1\n" - "sd $a0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a1\n" + "sw $t3, 16($at)\n" + "daui $at, $a1, 1\n" "sd $a0, 0($at)\n" + "daui $at, $a1, 4660 # 0x1234\n" + "sd $a0, 22136($at) # 0x5678\n" "sd $a0, -256($a1)\n" "sd $a0, -32768($a1)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a1\n" - "sd $a0, 0($at)\n"; + "daui $at, $a1, 0xABCE\n" + "sd $a0, -0x1100($at)\n" + "daui $at, $a1, 0x8000\n" + "dahi $at, $at, 1\n" + "sd $a0, -8($at)\n" + "daui $at, $a1, 0x8000\n" + "dahi $at, $at, 1\n" + "sw $a0, -4($at) # 0xFFFC\n" + "dsrl32 $t3, $a0, 0\n" + "sw $t3, 0($at) # 0x0\n" + "daui $at, $a1, 0x8000\n" + "sd $a0, 0($at) # 0x0\n" + "daui $at, $a1, 0x8000\n" + "sw $a0, 4($at) # 0x4\n" + "dsrl32 $t3, $a0, 0\n" + "sw $t3, 8($at) # 0x8\n"; DriverStr(expected, "StoreToOffset"); } @@ -1582,60 +2205,691 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) { "swc1 $f0, 4($a0)\n" "swc1 $f0, 256($a0)\n" "swc1 $f0, 0x7FFC($a0)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 4($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "swc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "swc1 $f0, 12($at)\n" + "daui $at, $a0, 1\n" "swc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "swc1 $f0, 22136($at) # 0x5678\n" "swc1 $f0, -256($a0)\n" "swc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "swc1 $f0, 0($at)\n" + "daui $at, $a0, 0xABCE\n" + "swc1 $f0, -0x1100($at)\n" "sdc1 $f0, 0($a0)\n" "mfhc1 $t3, $f0\n" "swc1 $f0, 4($a0)\n" "sw $t3, 8($a0)\n" "sdc1 $f0, 256($a0)\n" - "ori $at, $zero, 0x7FF8\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" "mfhc1 $t3, $f0\n" "swc1 $f0, 4($at)\n" "sw $t3, 8($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" - "sdc1 $f0, 0($at)\n" - "ori $at, $zero, 0x8000\n" - "daddu $at, $at, $a0\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" + "sdc1 $f0, 8($at)\n" + "daddiu $at, $a0, 32760 # 0x7FF8\n" "mfhc1 $t3, $f0\n" - "swc1 $f0, 4($at)\n" - "sw $t3, 8($at)\n" - "lui $at, 1\n" - "daddu $at, $at, $a0\n" - "sdc1 $f0, 0($at)\n" - "lui $at, 0x1234\n" - "ori $at, 0x5678\n" - "daddu $at, $at, $a0\n" + "swc1 $f0, 12($at)\n" + "sw $t3, 16($at)\n" + "daui $at, $a0, 1\n" "sdc1 $f0, 0($at)\n" + "daui $at, $a0, 4660 # 0x1234\n" + "sdc1 $f0, 22136($at) # 0x5678\n" "sdc1 $f0, -256($a0)\n" "sdc1 $f0, -32768($a0)\n" - "lui $at, 0xABCD\n" - "ori $at, 0xEF00\n" - "daddu $at, $at, $a0\n" - "sdc1 $f0, 0($at)\n"; + "daui $at, $a0, 0xABCE\n" + "sdc1 $f0, -0x1100($at)\n"; DriverStr(expected, "StoreFpuToOffset"); } +TEST_F(AssemblerMIPS64Test, StoreConstToOffset) { + __ StoreConstToOffset(mips64::kStoreByte, 0xFF, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreHalfword, 0xFFFF, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x123456789ABCDEF0, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreByte, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreHalfword, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567812345678, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567800000000, mips64::A1, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreDoubleword, 0x0000000012345678, mips64::A1, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, +0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, -0xFFF0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0xFFF0, mips64::T8); + + __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, -0xFFF0, mips64::T8); + __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0xFFF0, mips64::T8); + + const char* expected = + "ori $t8, $zero, 0xFF\n" + "sb $t8, 0($a1)\n" + "ori $t8, $zero, 0xFFFF\n" + "sh $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8,0x5678\n" + "sw $t8, 0($a1)\n" + "lui $t8, 0x9abc\n" + "ori $t8, $t8,0xdef0\n" + "dahi $t8, $t8, 0x5679\n" + "dati $t8, $t8, 0x1234\n" + "sd $t8, 0($a1)\n" + "sb $zero, 0($a1)\n" + "sh $zero, 0($a1)\n" + "sw $zero, 0($a1)\n" + "sd $zero, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8,0x5678\n" + "dins $t8, $t8, 0x20, 0x20\n" + "sd $t8, 0($a1)\n" + "lui $t8, 0x246\n" + "ori $t8, $t8, 0x8acf\n" + "dsll32 $t8, $t8, 0x3\n" + "sd $t8, 0($a1)\n" + "lui $t8, 0x1234\n" + "ori $t8, $t8, 0x5678\n" + "sd $t8, 0($a1)\n" + "sw $zero, 0($t8)\n" + "lui $at,0x1234\n" + "ori $at, $at, 0x5678\n" + "sw $at, 0($t8)\n" + "daddiu $at, $a1, -32760 # 0x8008\n" + "sw $zero, -32760($at) # 0x8008\n" + "daddiu $at, $a1, 32760 # 0x7FF8\n" + "lui $t8, 4660 # 0x1234\n" + "ori $t8, $t8, 22136 # 0x5678\n" + "sw $t8, 32760($at) # 0x7FF8\n" + "daddiu $at, $t8, -32760 # 0x8008\n" + "sw $zero, -32760($at) # 0x8008\n" + "daddiu $at, $t8, 32760 # 0x7FF8\n" + "lui $t8, 4660 # 0x1234\n" + "ori $t8, $t8, 22136 # 0x5678\n" + "sw $t8, 32760($at) # 0x7FF8\n"; + DriverStr(expected, "StoreConstToOffset"); +} +////////////////////////////// +// Loading/adding Constants // +////////////////////////////// + +TEST_F(AssemblerMIPS64Test, LoadConst32) { + // IsUint<16>(value) + __ LoadConst32(mips64::V0, 0); + __ LoadConst32(mips64::V0, 65535); + // IsInt<16>(value) + __ LoadConst32(mips64::V0, -1); + __ LoadConst32(mips64::V0, -32768); + // Everything else + __ LoadConst32(mips64::V0, 65536); + __ LoadConst32(mips64::V0, 65537); + __ LoadConst32(mips64::V0, 2147483647); + __ LoadConst32(mips64::V0, -32769); + __ LoadConst32(mips64::V0, -65536); + __ LoadConst32(mips64::V0, -65537); + __ LoadConst32(mips64::V0, -2147483647); + __ LoadConst32(mips64::V0, -2147483648); + + const char* expected = + // IsUint<16>(value) + "ori $v0, $zero, 0\n" // __ LoadConst32(mips64::V0, 0); + "ori $v0, $zero, 65535\n" // __ LoadConst32(mips64::V0, 65535); + // IsInt<16>(value) + "addiu $v0, $zero, -1\n" // __ LoadConst32(mips64::V0, -1); + "addiu $v0, $zero, -32768\n" // __ LoadConst32(mips64::V0, -32768); + // Everything else + "lui $v0, 1\n" // __ LoadConst32(mips64::V0, 65536); + "lui $v0, 1\n" // __ LoadConst32(mips64::V0, 65537); + "ori $v0, 1\n" // " + "lui $v0, 32767\n" // __ LoadConst32(mips64::V0, 2147483647); + "ori $v0, 65535\n" // " + "lui $v0, 65535\n" // __ LoadConst32(mips64::V0, -32769); + "ori $v0, 32767\n" // " + "lui $v0, 65535\n" // __ LoadConst32(mips64::V0, -65536); + "lui $v0, 65534\n" // __ LoadConst32(mips64::V0, -65537); + "ori $v0, 65535\n" // " + "lui $v0, 32768\n" // __ LoadConst32(mips64::V0, -2147483647); + "ori $v0, 1\n" // " + "lui $v0, 32768\n"; // __ LoadConst32(mips64::V0, -2147483648); + DriverStr(expected, "LoadConst32"); +} + +TEST_F(AssemblerMIPS64Test, Addiu32) { + __ Addiu32(mips64::A1, mips64::A2, -0x8000); + __ Addiu32(mips64::A1, mips64::A2, +0); + __ Addiu32(mips64::A1, mips64::A2, +0x7FFF); + __ Addiu32(mips64::A1, mips64::A2, -0x8001); + __ Addiu32(mips64::A1, mips64::A2, +0x8000); + __ Addiu32(mips64::A1, mips64::A2, -0x10000); + __ Addiu32(mips64::A1, mips64::A2, +0x10000); + __ Addiu32(mips64::A1, mips64::A2, +0x12345678); + + const char* expected = + "addiu $a1, $a2, -0x8000\n" + "addiu $a1, $a2, 0\n" + "addiu $a1, $a2, 0x7FFF\n" + "aui $a1, $a2, 0xFFFF\n" + "addiu $a1, $a1, 0x7FFF\n" + "aui $a1, $a2, 1\n" + "addiu $a1, $a1, -0x8000\n" + "aui $a1, $a2, 0xFFFF\n" + "aui $a1, $a2, 1\n" + "aui $a1, $a2, 0x1234\n" + "addiu $a1, $a1, 0x5678\n"; + DriverStr(expected, "Addiu32"); +} + +static uint64_t SignExtend16To64(uint16_t n) { + return static_cast<int16_t>(n); +} + +// The art::mips64::Mips64Assembler::LoadConst64() method uses a template +// to minimize the number of instructions needed to load a 64-bit constant +// value into a register. The template calls various methods which emit +// MIPS machine instructions. This struct (class) uses the same template +// but overrides the definitions of the methods which emit MIPS instructions +// to use methods which simulate the operation of the corresponding MIPS +// instructions. After invoking LoadConst64() the target register should +// contain the same 64-bit value as was input to LoadConst64(). If the +// simulated register doesn't contain the correct value then there is probably +// an error in the template function. +struct LoadConst64Tester { + LoadConst64Tester() { + // Initialize all of the registers for simulation to zero. + for (int r = 0; r < 32; r++) { + regs_[r] = 0; + } + // Clear all of the path flags. + loadconst64_paths_ = art::mips64::kLoadConst64PathZero; + } + void Addiu(mips64::GpuRegister rd, mips64::GpuRegister rs, uint16_t c) { + regs_[rd] = static_cast<int32_t>(regs_[rs] + SignExtend16To64(c)); + } + void Daddiu(mips64::GpuRegister rd, mips64::GpuRegister rs, uint16_t c) { + regs_[rd] = regs_[rs] + SignExtend16To64(c); + } + void Dahi(mips64::GpuRegister rd, uint16_t c) { + regs_[rd] += SignExtend16To64(c) << 32; + } + void Dati(mips64::GpuRegister rd, uint16_t c) { + regs_[rd] += SignExtend16To64(c) << 48; + } + void Dinsu(mips64::GpuRegister rt, mips64::GpuRegister rs, int pos, int size) { + CHECK(IsUint<5>(pos - 32)) << pos; + CHECK(IsUint<5>(size - 1)) << size; + CHECK(IsUint<5>(pos + size - 33)) << pos << " + " << size; + uint64_t src_mask = (UINT64_C(1) << size) - 1; + uint64_t dsk_mask = ~(src_mask << pos); + + regs_[rt] = (regs_[rt] & dsk_mask) | ((regs_[rs] & src_mask) << pos); + } + void Dsll(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) { + regs_[rd] = regs_[rt] << (shamt & 0x1f); + } + void Dsll32(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) { + regs_[rd] = regs_[rt] << (32 + (shamt & 0x1f)); + } + void Dsrl(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) { + regs_[rd] = regs_[rt] >> (shamt & 0x1f); + } + void Dsrl32(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) { + regs_[rd] = regs_[rt] >> (32 + (shamt & 0x1f)); + } + void Lui(mips64::GpuRegister rd, uint16_t c) { + regs_[rd] = SignExtend16To64(c) << 16; + } + void Ori(mips64::GpuRegister rd, mips64::GpuRegister rs, uint16_t c) { + regs_[rd] = regs_[rs] | c; + } + void LoadConst32(mips64::GpuRegister rd, int32_t c) { + CHECK_NE(rd, 0); + mips64::TemplateLoadConst32<LoadConst64Tester>(this, rd, c); + CHECK_EQ(regs_[rd], static_cast<uint64_t>(c)); + } + void LoadConst64(mips64::GpuRegister rd, int64_t c) { + CHECK_NE(rd, 0); + mips64::TemplateLoadConst64<LoadConst64Tester>(this, rd, c); + CHECK_EQ(regs_[rd], static_cast<uint64_t>(c)); + } + uint64_t regs_[32]; + + // Getter function for loadconst64_paths_. + int GetPathsCovered() { + return loadconst64_paths_; + } + + void RecordLoadConst64Path(int value) { + loadconst64_paths_ |= value; + } + + private: + // This variable holds a bitmask to tell us which paths were taken + // through the template function which loads 64-bit values. + int loadconst64_paths_; +}; + +TEST_F(AssemblerMIPS64Test, LoadConst64) { + const uint16_t imms[] = { + 0, 1, 2, 3, 4, 0x33, 0x66, 0x55, 0x99, 0xaa, 0xcc, 0xff, 0x5500, 0x5555, + 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff, 0x8000, 0x8001, 0x8002, 0x8003, 0x8004, + 0xaaaa, 0xfffc, 0xfffd, 0xfffe, 0xffff + }; + unsigned d0, d1, d2, d3; + LoadConst64Tester tester; + + union { + int64_t v64; + uint16_t v16[4]; + } u; + + for (d3 = 0; d3 < sizeof imms / sizeof imms[0]; d3++) { + u.v16[3] = imms[d3]; + + for (d2 = 0; d2 < sizeof imms / sizeof imms[0]; d2++) { + u.v16[2] = imms[d2]; + + for (d1 = 0; d1 < sizeof imms / sizeof imms[0]; d1++) { + u.v16[1] = imms[d1]; + + for (d0 = 0; d0 < sizeof imms / sizeof imms[0]; d0++) { + u.v16[0] = imms[d0]; + + tester.LoadConst64(mips64::V0, u.v64); + } + } + } + } + + // Verify that we tested all paths through the "load 64-bit value" + // function template. + EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths); +} + +// MSA instructions. + +TEST_F(AssemblerMIPS64Test, AndV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v"); +} + +TEST_F(AssemblerMIPS64Test, OrV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v"); +} + +TEST_F(AssemblerMIPS64Test, NorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v"); +} + +TEST_F(AssemblerMIPS64Test, XorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v"); +} + +TEST_F(AssemblerMIPS64Test, AddvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"), + "addv.b"); +} + +TEST_F(AssemblerMIPS64Test, AddvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"), + "addv.h"); +} + +TEST_F(AssemblerMIPS64Test, AddvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"), + "addv.w"); +} + +TEST_F(AssemblerMIPS64Test, AddvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"), + "addv.d"); +} + +TEST_F(AssemblerMIPS64Test, SubvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"), + "subv.b"); +} + +TEST_F(AssemblerMIPS64Test, SubvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"), + "subv.h"); +} + +TEST_F(AssemblerMIPS64Test, SubvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"), + "subv.w"); +} + +TEST_F(AssemblerMIPS64Test, SubvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"), + "subv.d"); +} + +TEST_F(AssemblerMIPS64Test, MulvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"), + "mulv.b"); +} + +TEST_F(AssemblerMIPS64Test, MulvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"), + "mulv.h"); +} + +TEST_F(AssemblerMIPS64Test, MulvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"), + "mulv.w"); +} + +TEST_F(AssemblerMIPS64Test, MulvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"), + "mulv.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"), + "div_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"), + "div_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"), + "div_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"), + "div_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"), + "div_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"), + "div_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"), + "div_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"), + "div_u.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"), + "mod_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"), + "mod_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"), + "mod_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"), + "mod_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"), + "mod_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"), + "mod_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"), + "mod_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"), + "mod_u.d"); +} + +TEST_F(AssemblerMIPS64Test, FaddW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), + "fadd.w"); +} + +TEST_F(AssemblerMIPS64Test, FaddD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"), + "fadd.d"); +} + +TEST_F(AssemblerMIPS64Test, FsubW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"), + "fsub.w"); +} + +TEST_F(AssemblerMIPS64Test, FsubD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"), + "fsub.d"); +} + +TEST_F(AssemblerMIPS64Test, FmulW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"), + "fmul.w"); +} + +TEST_F(AssemblerMIPS64Test, FmulD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"), + "fmul.d"); +} + +TEST_F(AssemblerMIPS64Test, FdivW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"), + "fdiv.w"); +} + +TEST_F(AssemblerMIPS64Test, FdivD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"), + "fdiv.d"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), + "ffint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"), + "ffint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"), + "ftint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"), + "ftint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, SllB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b"); +} + +TEST_F(AssemblerMIPS64Test, SllH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h"); +} + +TEST_F(AssemblerMIPS64Test, SllW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w"); +} + +TEST_F(AssemblerMIPS64Test, SllD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d"); +} + +TEST_F(AssemblerMIPS64Test, SraB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b"); +} + +TEST_F(AssemblerMIPS64Test, SraH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h"); +} + +TEST_F(AssemblerMIPS64Test, SraW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w"); +} + +TEST_F(AssemblerMIPS64Test, SraD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d"); +} + +TEST_F(AssemblerMIPS64Test, SrlB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b"); +} + +TEST_F(AssemblerMIPS64Test, SrlH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h"); +} + +TEST_F(AssemblerMIPS64Test, SrlW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w"); +} + +TEST_F(AssemblerMIPS64Test, SrlD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d"); +} + +TEST_F(AssemblerMIPS64Test, SlliB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"), + "slli.b"); +} + +TEST_F(AssemblerMIPS64Test, SlliH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"), + "slli.h"); +} + +TEST_F(AssemblerMIPS64Test, SlliW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"), + "slli.w"); +} + +TEST_F(AssemblerMIPS64Test, SlliD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"), + "slli.d"); +} + +TEST_F(AssemblerMIPS64Test, MoveV) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v"); +} + +TEST_F(AssemblerMIPS64Test, SplatiB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"), + "splati.b"); +} + +TEST_F(AssemblerMIPS64Test, SplatiH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"), + "splati.h"); +} + +TEST_F(AssemblerMIPS64Test, SplatiW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"), + "splati.w"); +} + +TEST_F(AssemblerMIPS64Test, SplatiD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"), + "splati.d"); +} + +TEST_F(AssemblerMIPS64Test, FillB) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b"); +} + +TEST_F(AssemblerMIPS64Test, FillH) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h"); +} + +TEST_F(AssemblerMIPS64Test, FillW) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w"); +} + +TEST_F(AssemblerMIPS64Test, FillD) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d"); +} + +TEST_F(AssemblerMIPS64Test, LdiB) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); +} + +TEST_F(AssemblerMIPS64Test, LdiH) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h"); +} + +TEST_F(AssemblerMIPS64Test, LdiW) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w"); +} + +TEST_F(AssemblerMIPS64Test, LdiD) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d"); +} + +TEST_F(AssemblerMIPS64Test, LdB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b"); +} + +TEST_F(AssemblerMIPS64Test, LdH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2), + "ld.h"); +} + +TEST_F(AssemblerMIPS64Test, LdW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4), + "ld.w"); +} + +TEST_F(AssemblerMIPS64Test, LdD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8), + "ld.d"); +} + +TEST_F(AssemblerMIPS64Test, StB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b"); +} + +TEST_F(AssemblerMIPS64Test, StH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2), + "st.h"); +} + +TEST_F(AssemblerMIPS64Test, StW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4), + "st.w"); +} + +TEST_F(AssemblerMIPS64Test, StD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8), + "st.d"); +} + #undef __ } // namespace art diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h index f57498d34f..bc8e40b437 100644 --- a/compiler/utils/mips64/constants_mips64.h +++ b/compiler/utils/mips64/constants_mips64.h @@ -51,8 +51,36 @@ enum InstructionFields { kFdShift = 6, kFdBits = 5, + kMsaOperationShift = 23, + kMsaELMOperationShift = 22, + kMsa2ROperationShift = 18, + kMsa2RFOperationShift = 17, + kDfShift = 21, + kDfMShift = 16, + kDf2RShift = 16, + kDfNShift = 16, + kWtShift = 16, + kWtBits = 5, + kWsShift = 11, + kWsBits = 5, + kWdShift = 6, + kWdBits = 5, + kS10Shift = 16, + kI10Shift = 11, + kS10MinorShift = 2, + kBranchOffsetMask = 0x0000ffff, kJumpOffsetMask = 0x03ffffff, + kMsaMajorOpcode = 0x1e, + kMsaDfMByteMask = 0x70, + kMsaDfMHalfwordMask = 0x60, + kMsaDfMWordMask = 0x40, + kMsaDfMDoublewordMask = 0x00, + kMsaDfNByteMask = 0x00, + kMsaDfNHalfwordMask = 0x20, + kMsaDfNWordMask = 0x30, + kMsaDfNDoublewordMask = 0x38, + kMsaS10Mask = 0x3ff, }; enum ScaleFactor { diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc index dea396e4a7..42d061ec15 100644 --- a/compiler/utils/mips64/managed_register_mips64.cc +++ b/compiler/utils/mips64/managed_register_mips64.cc @@ -26,6 +26,11 @@ bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const { CHECK(IsValidManagedRegister()); CHECK(other.IsValidManagedRegister()); if (Equals(other)) return true; + if (IsFpuRegister() && other.IsVectorRegister()) { + return (AsFpuRegister() == other.AsOverlappingFpuRegister()); + } else if (IsVectorRegister() && other.IsFpuRegister()) { + return (AsVectorRegister() == other.AsOverlappingVectorRegister()); + } return false; } @@ -36,6 +41,8 @@ void Mips64ManagedRegister::Print(std::ostream& os) const { os << "GPU: " << static_cast<int>(AsGpuRegister()); } else if (IsFpuRegister()) { os << "FpuRegister: " << static_cast<int>(AsFpuRegister()); + } else if (IsVectorRegister()) { + os << "VectorRegister: " << static_cast<int>(AsVectorRegister()); } else { os << "??: " << RegId(); } diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h index 1d36128a09..3980199b1e 100644 --- a/compiler/utils/mips64/managed_register_mips64.h +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -30,36 +30,73 @@ const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters; const int kNumberOfFpuRegIds = kNumberOfFpuRegisters; const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters; -const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds; -const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds; - -// An instance of class 'ManagedRegister' represents a single GPU register (enum -// Register) or a double precision FP register (enum FpuRegister) +const int kNumberOfVecRegIds = kNumberOfVectorRegisters; +const int kNumberOfVecAllocIds = kNumberOfVectorRegisters; + +const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds; +const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds; + +// Register ids map: +// [0..R[ core registers (enum GpuRegister) +// [R..F[ floating-point registers (enum FpuRegister) +// [F..W[ MSA vector registers (enum VectorRegister) +// where +// R = kNumberOfGpuRegIds +// F = R + kNumberOfFpuRegIds +// W = F + kNumberOfVecRegIds + +// An instance of class 'ManagedRegister' represents a single Mips64 register. +// A register can be one of the following: +// * core register (enum GpuRegister) +// * floating-point register (enum FpuRegister) +// * MSA vector register (enum VectorRegister) +// // 'ManagedRegister::NoRegister()' provides an invalid register. // There is a one-to-one mapping between ManagedRegister and register id. class Mips64ManagedRegister : public ManagedRegister { public: - GpuRegister AsGpuRegister() const { + constexpr GpuRegister AsGpuRegister() const { CHECK(IsGpuRegister()); return static_cast<GpuRegister>(id_); } - FpuRegister AsFpuRegister() const { + constexpr FpuRegister AsFpuRegister() const { CHECK(IsFpuRegister()); return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds); } - bool IsGpuRegister() const { + constexpr VectorRegister AsVectorRegister() const { + CHECK(IsVectorRegister()); + return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters)); + } + + constexpr FpuRegister AsOverlappingFpuRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<FpuRegister>(AsVectorRegister()); + } + + constexpr VectorRegister AsOverlappingVectorRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<VectorRegister>(AsFpuRegister()); + } + + constexpr bool IsGpuRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfGpuRegIds); } - bool IsFpuRegister() const { + constexpr bool IsFpuRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - kNumberOfGpuRegIds; return (0 <= test) && (test < kNumberOfFpuRegIds); } + constexpr bool IsVectorRegister() const { + CHECK(IsValidManagedRegister()); + const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds); + return (0 <= test) && (test < kNumberOfVecRegIds); + } + void Print(std::ostream& os) const; // Returns true if the two managed-registers ('this' and 'other') overlap. @@ -67,22 +104,27 @@ class Mips64ManagedRegister : public ManagedRegister { // then false is returned. bool Overlaps(const Mips64ManagedRegister& other) const; - static Mips64ManagedRegister FromGpuRegister(GpuRegister r) { + static constexpr Mips64ManagedRegister FromGpuRegister(GpuRegister r) { CHECK_NE(r, kNoGpuRegister); return FromRegId(r); } - static Mips64ManagedRegister FromFpuRegister(FpuRegister r) { + static constexpr Mips64ManagedRegister FromFpuRegister(FpuRegister r) { CHECK_NE(r, kNoFpuRegister); return FromRegId(r + kNumberOfGpuRegIds); } + static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) { + CHECK_NE(r, kNoVectorRegister); + return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds); + } + private: - bool IsValidManagedRegister() const { + constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); } - int RegId() const { + constexpr int RegId() const { CHECK(!IsNoRegister()); return id_; } @@ -98,9 +140,9 @@ class Mips64ManagedRegister : public ManagedRegister { friend class ManagedRegister; - explicit Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + explicit constexpr Mips64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} - static Mips64ManagedRegister FromRegId(int reg_id) { + static constexpr Mips64ManagedRegister FromRegId(int reg_id) { Mips64ManagedRegister reg(reg_id); CHECK(reg.IsValidManagedRegister()); return reg; @@ -111,7 +153,7 @@ std::ostream& operator<<(std::ostream& os, const Mips64ManagedRegister& reg); } // namespace mips64 -inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const { +constexpr inline mips64::Mips64ManagedRegister ManagedRegister::AsMips64() const { mips64::Mips64ManagedRegister reg(id_); CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); return reg; diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc new file mode 100644 index 0000000000..8b72d7e61d --- /dev/null +++ b/compiler/utils/mips64/managed_register_mips64_test.cc @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "managed_register_mips64.h" +#include "globals.h" +#include "gtest/gtest.h" + +namespace art { +namespace mips64 { + +TEST(Mips64ManagedRegister, NoRegister) { + Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64(); + EXPECT_TRUE(reg.IsNoRegister()); + EXPECT_FALSE(reg.Overlaps(reg)); +} + +TEST(Mips64ManagedRegister, GpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(ZERO, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(AT); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(AT, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(V0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(V0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A7); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A7, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T3); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T3, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(S0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(GP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(GP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(SP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(SP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(RA, reg.AsGpuRegister()); +} + +TEST(Mips64ManagedRegister, FpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F0, reg.AsFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + + reg = Mips64ManagedRegister::FromFpuRegister(F1); + vreg = Mips64ManagedRegister::FromVectorRegister(W1); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F1, reg.AsFpuRegister()); + EXPECT_EQ(W1, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + + reg = Mips64ManagedRegister::FromFpuRegister(F20); + vreg = Mips64ManagedRegister::FromVectorRegister(W20); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F20, reg.AsFpuRegister()); + EXPECT_EQ(W20, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + vreg = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F31, reg.AsFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); +} + +TEST(Mips64ManagedRegister, VectorRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0); + Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W0, reg.AsVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + reg = Mips64ManagedRegister::FromVectorRegister(W2); + freg = Mips64ManagedRegister::FromFpuRegister(F2); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W2, reg.AsVectorRegister()); + EXPECT_EQ(F2, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2))); + + reg = Mips64ManagedRegister::FromVectorRegister(W13); + freg = Mips64ManagedRegister::FromFpuRegister(F13); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W13, reg.AsVectorRegister()); + EXPECT_EQ(F13, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13))); + + reg = Mips64ManagedRegister::FromVectorRegister(W29); + freg = Mips64ManagedRegister::FromFpuRegister(F29); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W29, reg.AsVectorRegister()); + EXPECT_EQ(F29, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29))); +} + +TEST(Mips64ManagedRegister, Equals) { + ManagedRegister no_reg = ManagedRegister::NoRegister(); + EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); + + Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +TEST(Mips64ManagedRegister, Overlaps) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F4); + reg_o = Mips64ManagedRegister::FromVectorRegister(W4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W4, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F16); + reg_o = Mips64ManagedRegister::FromVectorRegister(W16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W16, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + reg_o = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W0); + reg_o = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W4); + reg_o = Mips64ManagedRegister::FromFpuRegister(F4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F4, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W16); + reg_o = Mips64ManagedRegister::FromFpuRegister(F16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F16, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W31); + reg_o = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F31, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +} // namespace mips64 +} // namespace art diff --git a/compiler/utils/string_reference.h b/compiler/utils/string_reference.h deleted file mode 100644 index 72552f21aa..0000000000 --- a/compiler/utils/string_reference.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_STRING_REFERENCE_H_ -#define ART_COMPILER_UTILS_STRING_REFERENCE_H_ - -#include <stdint.h> - -#include "base/logging.h" -#include "utf-inl.h" - -namespace art { - -class DexFile; - -// A string is uniquely located by its DexFile and the string_ids_ table index into that DexFile. -struct StringReference { - StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { } - - const DexFile* dex_file; - uint32_t string_index; -}; - -// Compare the actual referenced string values. Used for string reference deduplication. -struct StringReferenceValueComparator { - bool operator()(StringReference sr1, StringReference sr2) const { - // Note that we want to deduplicate identical strings even if they are referenced - // by different dex files, so we need some (any) total ordering of strings, rather - // than references. However, the references should usually be from the same dex file, - // so we choose the dex file string ordering so that we can simply compare indexes - // and avoid the costly string comparison in the most common case. - if (sr1.dex_file == sr2.dex_file) { - // Use the string order enforced by the dex file verifier. - DCHECK_EQ( - sr1.string_index < sr2.string_index, - CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues( - sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)), - sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0); - return sr1.string_index < sr2.string_index; - } else { - // Cannot compare indexes, so do the string comparison. - return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues( - sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)), - sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0; - } - } -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_STRING_REFERENCE_H_ diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc new file mode 100644 index 0000000000..90335eb048 --- /dev/null +++ b/compiler/utils/string_reference_test.cc @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_reference.h" + +#include <memory> + +#include "dex_file_types.h" +#include "gtest/gtest.h" +#include "utils/test_dex_file_builder.h" + +namespace art { + +TEST(StringReference, ValueComparator) { + // This is a regression test for the StringReferenceValueComparator using the wrong + // dex file to get the string data from a StringId. We construct two dex files with + // just a single string with the same length but different value. This creates dex + // files that have the same layout, so the byte offset read from the StringId in one + // dex file, when used in the other dex file still points to valid string data, except + // that it's the wrong string. Without the fix the strings would then compare equal. + TestDexFileBuilder builder1; + builder1.AddString("String1"); + std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1"); + ASSERT_EQ(1u, dex_file1->NumStringIds()); + ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(dex::StringIndex(0)))); + StringReference sr1(dex_file1.get(), dex::StringIndex(0)); + + TestDexFileBuilder builder2; + builder2.AddString("String2"); + std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2"); + ASSERT_EQ(1u, dex_file2->NumStringIds()); + ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(dex::StringIndex(0)))); + StringReference sr2(dex_file2.get(), dex::StringIndex(0)); + + StringReferenceValueComparator cmp; + EXPECT_TRUE(cmp(sr1, sr2)); // "String1" < "String2" is true. + EXPECT_FALSE(cmp(sr2, sr1)); // "String2" < "String1" is false. +} + +TEST(StringReference, ValueComparator2) { + const char* const kDexFile1Strings[] = { + "", + "abc", + "abcxyz", + }; + const char* const kDexFile2Strings[] = { + "a", + "abc", + "abcdef", + "def", + }; + const bool expectedCmp12[arraysize(kDexFile1Strings)][arraysize(kDexFile2Strings)] = { + { true, true, true, true }, + { false, false, true, true }, + { false, false, false, true }, + }; + const bool expectedCmp21[arraysize(kDexFile2Strings)][arraysize(kDexFile1Strings)] = { + { false, true, true }, + { false, false, true }, + { false, false, true }, + { false, false, false }, + }; + + TestDexFileBuilder builder1; + for (const char* s : kDexFile1Strings) { + builder1.AddString(s); + } + std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1"); + ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds()); + for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) { + ASSERT_STREQ(kDexFile1Strings[index], + dex_file1->GetStringData(dex_file1->GetStringId(dex::StringIndex(index)))); + } + + TestDexFileBuilder builder2; + for (const char* s : kDexFile2Strings) { + builder2.AddString(s); + } + std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1"); + ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds()); + for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) { + ASSERT_STREQ(kDexFile2Strings[index], + dex_file2->GetStringData(dex_file2->GetStringId(dex::StringIndex(index)))); + } + + StringReferenceValueComparator cmp; + for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) { + for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) { + StringReference sr1(dex_file1.get(), dex::StringIndex(index1)); + StringReference sr2(dex_file2.get(), dex::StringIndex(index2)); + EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2; + EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2; + } + } +} + +} // namespace art diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc index 1a8f567aa1..a1eb08e041 100644 --- a/compiler/utils/swap_space.cc +++ b/compiler/utils/swap_space.cc @@ -36,17 +36,17 @@ template <typename FreeBySizeSet> static void DumpFreeMap(const FreeBySizeSet& free_by_size) { size_t last_size = static_cast<size_t>(-1); for (const auto& entry : free_by_size) { - if (last_size != entry.first) { - last_size = entry.first; + if (last_size != entry.size) { + last_size = entry.size; LOG(INFO) << "Size " << last_size; } - LOG(INFO) << " 0x" << std::hex << entry.second->Start() - << " size=" << std::dec << entry.second->size; + LOG(INFO) << " 0x" << std::hex << entry.free_by_start_entry->Start() + << " size=" << std::dec << entry.free_by_start_entry->size; } } void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) { - auto free_by_start_pos = free_by_size_pos->second; + auto free_by_start_pos = free_by_size_pos->free_by_start_entry; free_by_size_.erase(free_by_size_pos); free_by_start_.erase(free_by_start_pos); } @@ -89,7 +89,7 @@ static size_t CollectFree(const FreeByStartSet& free_by_start, const FreeBySizeS // Calculate over free_by_size. size_t sum1 = 0; for (const auto& entry : free_by_size) { - sum1 += entry.second->size; + sum1 += entry.free_by_start_entry->size; } // Calculate over free_by_start. @@ -110,27 +110,52 @@ void* SwapSpace::Alloc(size_t size) { // Check the free list for something that fits. // TODO: Smarter implementation. Global biggest chunk, ... - SpaceChunk old_chunk; auto it = free_by_start_.empty() ? free_by_size_.end() : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() }); if (it != free_by_size_.end()) { - old_chunk = *it->second; - RemoveChunk(it); + auto entry = it->free_by_start_entry; + SpaceChunk old_chunk = *entry; + if (old_chunk.size == size) { + RemoveChunk(it); + } else { + // Try to avoid deallocating and allocating the std::set<> nodes. + // This would be much simpler if we could use replace() from Boost.Bimap. + + // The free_by_start_ map contains disjoint intervals ordered by the `ptr`. + // Shrinking the interval does not affect the ordering. + it->free_by_start_entry->ptr += size; + it->free_by_start_entry->size -= size; + + // The free_by_size_ map is ordered by the `size` and then `free_by_start_entry->ptr`. + // Adjusting the `ptr` above does not change that ordering but decreasing `size` can + // push the node before the previous node(s). + if (it == free_by_size_.begin()) { + it->size -= size; + } else { + auto prev = it; + --prev; + FreeBySizeEntry new_value(old_chunk.size - size, entry); + if (free_by_size_.key_comp()(*prev, new_value)) { + it->size -= size; + } else { + // Changing in place would break the std::set<> ordering, we need to remove and insert. + free_by_size_.erase(it); + free_by_size_.insert(new_value); + } + } + } + return old_chunk.ptr; } else { // Not a big enough free chunk, need to increase file size. - old_chunk = NewFileChunk(size); - } - - void* ret = old_chunk.ptr; - - if (old_chunk.size != size) { - // Insert the remainder. - SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size }; - InsertChunk(new_chunk); + SpaceChunk new_chunk = NewFileChunk(size); + if (new_chunk.size != size) { + // Insert the remainder. + SpaceChunk remainder = { new_chunk.ptr + size, new_chunk.size - size }; + InsertChunk(remainder); + } + return new_chunk.ptr; } - - return ret; } SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h index bf06675d72..c286b820fe 100644 --- a/compiler/utils/swap_space.h +++ b/compiler/utils/swap_space.h @@ -45,8 +45,10 @@ class SwapSpace { private: // Chunk of space. struct SpaceChunk { - uint8_t* ptr; - size_t size; + // We need mutable members as we keep these objects in a std::set<> (providing only const + // access) but we modify these members while carefully preserving the std::set<> ordering. + mutable uint8_t* ptr; + mutable size_t size; uintptr_t Start() const { return reinterpret_cast<uintptr_t>(ptr); @@ -66,13 +68,21 @@ class SwapSpace { typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet; // Map size to an iterator to free_by_start_'s entry. - typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry; + struct FreeBySizeEntry { + FreeBySizeEntry(size_t sz, FreeByStartSet::const_iterator entry) + : size(sz), free_by_start_entry(entry) { } + + // We need mutable members as we keep these objects in a std::set<> (providing only const + // access) but we modify these members while carefully preserving the std::set<> ordering. + mutable size_t size; + mutable FreeByStartSet::const_iterator free_by_start_entry; + }; struct FreeBySizeComparator { bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) { - if (lhs.first != rhs.first) { - return lhs.first < rhs.first; + if (lhs.size != rhs.size) { + return lhs.size < rhs.size; } else { - return lhs.second->Start() < rhs.second->Start(); + return lhs.free_by_start_entry->Start() < rhs.free_by_start_entry->Start(); } } }; @@ -114,7 +124,8 @@ class SwapAllocator<void> { explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {} template <typename U> - SwapAllocator(const SwapAllocator<U>& other) : swap_space_(other.swap_space_) {} + SwapAllocator(const SwapAllocator<U>& other) // NOLINT, implicit + : swap_space_(other.swap_space_) {} SwapAllocator(const SwapAllocator& other) = default; SwapAllocator& operator=(const SwapAllocator& other) = default; @@ -149,7 +160,8 @@ class SwapAllocator { explicit SwapAllocator(SwapSpace* swap_space) : swap_space_(swap_space) {} template <typename U> - SwapAllocator(const SwapAllocator<U>& other) : swap_space_(other.swap_space_) {} + SwapAllocator(const SwapAllocator<U>& other) // NOLINT, implicit + : swap_space_(other.swap_space_) {} SwapAllocator(const SwapAllocator& other) = default; SwapAllocator& operator=(const SwapAllocator& other) = default; diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h index fb37804649..6921780a85 100644 --- a/compiler/utils/test_dex_file_builder.h +++ b/compiler/utils/test_dex_file_builder.h @@ -227,9 +227,18 @@ class TestDexFileBuilder { // Write the complete header again, just simpler that way. std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header)); + static constexpr bool kVerify = false; + static constexpr bool kVerifyChecksum = false; std::string error_msg; std::unique_ptr<const DexFile> dex_file(DexFile::Open( - &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, false, &error_msg)); + &dex_file_data_[0], + dex_file_data_.size(), + dex_location, + 0u, + nullptr, + kVerify, + kVerifyChecksum, + &error_msg)); CHECK(dex_file != nullptr) << error_msg; return dex_file; } diff --git a/compiler/utils/test_dex_file_builder_test.cc b/compiler/utils/test_dex_file_builder_test.cc index 7a424a23f5..c76739b3b1 100644 --- a/compiler/utils/test_dex_file_builder_test.cc +++ b/compiler/utils/test_dex_file_builder_test.cc @@ -49,7 +49,8 @@ TEST(TestDexFileBuilderTest, SimpleTest) { }; ASSERT_EQ(arraysize(expected_strings), dex_file->NumStringIds()); for (size_t i = 0; i != arraysize(expected_strings); ++i) { - EXPECT_STREQ(expected_strings[i], dex_file->GetStringData(dex_file->GetStringId(i))) << i; + EXPECT_STREQ(expected_strings[i], + dex_file->GetStringData(dex_file->GetStringId(dex::StringIndex(i)))) << i; } static const char* const expected_types[] = { @@ -62,18 +63,19 @@ TEST(TestDexFileBuilderTest, SimpleTest) { }; ASSERT_EQ(arraysize(expected_types), dex_file->NumTypeIds()); for (size_t i = 0; i != arraysize(expected_types); ++i) { - EXPECT_STREQ(expected_types[i], dex_file->GetTypeDescriptor(dex_file->GetTypeId(i))) << i; + EXPECT_STREQ(expected_types[i], + dex_file->GetTypeDescriptor(dex_file->GetTypeId(dex::TypeIndex(i)))) << i; } ASSERT_EQ(1u, dex_file->NumFieldIds()); - EXPECT_STREQ("[I TestClass.intField", PrettyField(0u, *dex_file).c_str()); + EXPECT_STREQ("[I TestClass.intField", dex_file->PrettyField(0u).c_str()); ASSERT_EQ(2u, dex_file->NumProtoIds()); ASSERT_EQ(2u, dex_file->NumMethodIds()); EXPECT_STREQ("TestClass TestClass.bar(java.lang.Object, java.lang.Object[])", - PrettyMethod(0u, *dex_file).c_str()); + dex_file->PrettyMethod(0u).c_str()); EXPECT_STREQ("int TestClass.foo()", - PrettyMethod(1u, *dex_file).c_str()); + dex_file->PrettyMethod(1u).c_str()); EXPECT_EQ(0u, builder.GetStringIdx("Arbitrary string")); EXPECT_EQ(2u, builder.GetTypeIdx("Ljava/lang/Class;")); diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h new file mode 100644 index 0000000000..a0fa1a4a63 --- /dev/null +++ b/compiler/utils/type_reference.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_TYPE_REFERENCE_H_ +#define ART_COMPILER_UTILS_TYPE_REFERENCE_H_ + +#include <stdint.h> + +#include "base/logging.h" +#include "dex_file_types.h" +#include "string_reference.h" + +namespace art { + +class DexFile; + +// A type is located by its DexFile and the string_ids_ table index into that DexFile. +struct TypeReference { + TypeReference(const DexFile* file, dex::TypeIndex index) : dex_file(file), type_index(index) { } + + const DexFile* dex_file; + dex::TypeIndex type_index; +}; + +// Compare the actual referenced type names. Used for type reference deduplication. +struct TypeReferenceValueComparator { + bool operator()(TypeReference tr1, TypeReference tr2) const { + // Note that we want to deduplicate identical boot image types even if they are + // referenced by different dex files, so we simply compare the descriptors. + StringReference sr1(tr1.dex_file, tr1.dex_file->GetTypeId(tr1.type_index).descriptor_idx_); + StringReference sr2(tr2.dex_file, tr2.dex_file->GetTypeId(tr2.type_index).descriptor_idx_); + return StringReferenceValueComparator()(sr1, sr2); + } +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_TYPE_REFERENCE_H_ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 2203646e77..1736618363 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -350,6 +350,38 @@ void X86Assembler::movaps(XmmRegister dst, XmmRegister src) { } +void X86Assembler::movaps(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x28); + EmitOperand(dst, src); +} + + +void X86Assembler::movups(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x10); + EmitOperand(dst, src); +} + + +void X86Assembler::movaps(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x29); + EmitOperand(src, dst); +} + + +void X86Assembler::movups(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x11); + EmitOperand(src, dst); +} + + void X86Assembler::movss(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -467,6 +499,83 @@ void X86Assembler::divss(XmmRegister dst, const Address& src) { } +void X86Assembler::addps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x58); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::subps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5C); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::mulps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x59); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::divps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5E); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::movapd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x28); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::movapd(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x28); + EmitOperand(dst, src); +} + + +void X86Assembler::movupd(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x10); + EmitOperand(dst, src); +} + + +void X86Assembler::movapd(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x29); + EmitOperand(src, dst); +} + + +void X86Assembler::movupd(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x11); + EmitOperand(src, dst); +} + + void X86Assembler::flds(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); @@ -533,39 +642,6 @@ void X86Assembler::movhpd(const Address& dst, XmmRegister src) { } -void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { - DCHECK(shift_count.is_uint8()); - - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x66); - EmitUint8(0x0F); - EmitUint8(0x73); - EmitXmmRegisterOperand(3, reg); - EmitUint8(shift_count.value()); -} - - -void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { - DCHECK(shift_count.is_uint8()); - - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x66); - EmitUint8(0x0F); - EmitUint8(0x73); - EmitXmmRegisterOperand(2, reg); - EmitUint8(shift_count.value()); -} - - -void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x66); - EmitUint8(0x0F); - EmitUint8(0x62); - EmitXmmRegisterOperand(dst, src); -} - - void X86Assembler::addsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); @@ -638,6 +714,178 @@ void X86Assembler::divsd(XmmRegister dst, const Address& src) { } +void X86Assembler::addpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x58); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::subpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5C); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::mulpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x59); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::divpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5E); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::movdqa(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6F); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::movdqa(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6F); + EmitOperand(dst, src); +} + + +void X86Assembler::movdqu(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0x0F); + EmitUint8(0x6F); + EmitOperand(dst, src); +} + + +void X86Assembler::movdqa(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x7F); + EmitOperand(src, dst); +} + + +void X86Assembler::movdqu(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0x0F); + EmitUint8(0x7F); + EmitOperand(src, dst); +} + + +void X86Assembler::paddb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFC); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xF8); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFD); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xF9); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pmullw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD5); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFE); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFA); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pmulld(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x40); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD4); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFB); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -710,6 +958,14 @@ void X86Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) { } +void X86Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5B); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -727,6 +983,14 @@ void X86Assembler::comiss(XmmRegister a, XmmRegister b) { } +void X86Assembler::comiss(XmmRegister a, const Address& b) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x2F); + EmitOperand(a, b); +} + + void X86Assembler::comisd(XmmRegister a, XmmRegister b) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -736,6 +1000,15 @@ void X86Assembler::comisd(XmmRegister a, XmmRegister b) { } +void X86Assembler::comisd(XmmRegister a, const Address& b) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x2F); + EmitOperand(a, b); +} + + void X86Assembler::ucomiss(XmmRegister a, XmmRegister b) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -828,10 +1101,27 @@ void X86Assembler::xorpd(XmmRegister dst, XmmRegister src) { } -void X86Assembler::andps(XmmRegister dst, XmmRegister src) { +void X86Assembler::xorps(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); - EmitUint8(0x54); + EmitUint8(0x57); + EmitOperand(dst, src); +} + + +void X86Assembler::xorps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x57); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pxor(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEF); EmitXmmRegisterOperand(dst, src); } @@ -845,23 +1135,75 @@ void X86Assembler::andpd(XmmRegister dst, XmmRegister src) { } -void X86Assembler::orpd(XmmRegister dst, XmmRegister src) { +void X86Assembler::andpd(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); - EmitUint8(0x56); + EmitUint8(0x54); + EmitOperand(dst, src); +} + + +void X86Assembler::andps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x54); EmitXmmRegisterOperand(dst, src); } -void X86Assembler::xorps(XmmRegister dst, const Address& src) { +void X86Assembler::andps(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); - EmitUint8(0x57); + EmitUint8(0x54); EmitOperand(dst, src); } +void X86Assembler::pand(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDB); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::andnps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pandn(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDF); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::orpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x56); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::orps(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -870,28 +1212,268 @@ void X86Assembler::orps(XmmRegister dst, XmmRegister src) { } -void X86Assembler::xorps(XmmRegister dst, XmmRegister src) { +void X86Assembler::por(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); EmitUint8(0x0F); - EmitUint8(0x57); + EmitUint8(0xEB); EmitXmmRegisterOperand(dst, src); } -void X86Assembler::andps(XmmRegister dst, const Address& src) { +void X86Assembler::pavgb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); EmitUint8(0x0F); - EmitUint8(0x54); - EmitOperand(dst, src); + EmitUint8(0xE0); + EmitXmmRegisterOperand(dst, src); } -void X86Assembler::andpd(XmmRegister dst, const Address& src) { +void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); - EmitUint8(0x54); - EmitOperand(dst, src); + EmitUint8(0xE3); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x74); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x75); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x76); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x29); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x64); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x65); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x66); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x37); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xC6); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + +void X86Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xC6); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + +void X86Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x70); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + +void X86Assembler::punpcklbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x60); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpcklwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x61); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x62); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6C); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::pslld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psllq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psraw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrad(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(3, reg); + EmitUint8(shift_count.value()); } @@ -1030,6 +1612,14 @@ void X86Assembler::xchgl(Register reg, const Address& address) { } +void X86Assembler::cmpb(const Address& address, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x80); + EmitOperand(7, address); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::cmpw(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1123,6 +1713,23 @@ void X86Assembler::testl(Register reg, const Immediate& immediate) { } +void X86Assembler::testb(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF6); + EmitOperand(EAX, dst); + CHECK(imm.is_int8()); + EmitUint8(imm.value() & 0xFF); +} + + +void X86Assembler::testl(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF7); + EmitOperand(0, dst); + EmitImmediate(imm); +} + + void X86Assembler::andl(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x23); @@ -1666,6 +2273,13 @@ void X86Assembler::jmp(NearLabel* label) { } +void X86Assembler::repne_scasb() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF2); + EmitUint8(0xAE); +} + + void X86Assembler::repne_scasw() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1674,6 +2288,13 @@ void X86Assembler::repne_scasw() { } +void X86Assembler::repe_cmpsb() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF2); + EmitUint8(0xA6); +} + + void X86Assembler::repe_cmpsw() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1689,6 +2310,13 @@ void X86Assembler::repe_cmpsl() { } +void X86Assembler::rep_movsb() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0xA4); +} + + void X86Assembler::rep_movsw() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1918,489 +2546,6 @@ void X86Assembler::EmitGenericShift(int reg_or_opcode, EmitOperand(reg_or_opcode, operand); } -static dwarf::Reg DWARFReg(Register reg) { - return dwarf::Reg::X86Core(static_cast<int>(reg)); -} - -constexpr size_t kFramePointerSize = 4; - -void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& spill_regs, - const ManagedRegisterEntrySpills& entry_spills) { - DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. - cfi_.SetCurrentCFAOffset(4); // Return address on stack. - CHECK_ALIGNED(frame_size, kStackAlignment); - int gpr_count = 0; - for (int i = spill_regs.size() - 1; i >= 0; --i) { - Register spill = spill_regs.at(i).AsX86().AsCpuRegister(); - pushl(spill); - gpr_count++; - cfi_.AdjustCFAOffset(kFramePointerSize); - cfi_.RelOffset(DWARFReg(spill), 0); - } - - // return address then method on stack. - int32_t adjust = frame_size - gpr_count * kFramePointerSize - - kFramePointerSize /*method*/ - - kFramePointerSize /*return address*/; - addl(ESP, Immediate(-adjust)); - cfi_.AdjustCFAOffset(adjust); - pushl(method_reg.AsX86().AsCpuRegister()); - cfi_.AdjustCFAOffset(kFramePointerSize); - DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size); - - for (size_t i = 0; i < entry_spills.size(); ++i) { - ManagedRegisterSpill spill = entry_spills.at(i); - if (spill.AsX86().IsCpuRegister()) { - int offset = frame_size + spill.getSpillOffset(); - movl(Address(ESP, offset), spill.AsX86().AsCpuRegister()); - } else { - DCHECK(spill.AsX86().IsXmmRegister()); - if (spill.getSize() == 8) { - movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister()); - } else { - CHECK_EQ(spill.getSize(), 4); - movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister()); - } - } - } -} - -void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { - CHECK_ALIGNED(frame_size, kStackAlignment); - cfi_.RememberState(); - // -kFramePointerSize for ArtMethod*. - int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize; - addl(ESP, Immediate(adjust)); - cfi_.AdjustCFAOffset(-adjust); - for (size_t i = 0; i < spill_regs.size(); ++i) { - Register spill = spill_regs.at(i).AsX86().AsCpuRegister(); - popl(spill); - cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); - cfi_.Restore(DWARFReg(spill)); - } - ret(); - // The CFI should be restored for any code that follows the exit block. - cfi_.RestoreState(); - cfi_.DefCFAOffset(frame_size); -} - -void X86Assembler::IncreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - addl(ESP, Immediate(-adjust)); - cfi_.AdjustCFAOffset(adjust); -} - -void X86Assembler::DecreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - addl(ESP, Immediate(adjust)); - cfi_.AdjustCFAOffset(-adjust); -} - -void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { - X86ManagedRegister src = msrc.AsX86(); - if (src.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (src.IsCpuRegister()) { - CHECK_EQ(4u, size); - movl(Address(ESP, offs), src.AsCpuRegister()); - } else if (src.IsRegisterPair()) { - CHECK_EQ(8u, size); - movl(Address(ESP, offs), src.AsRegisterPairLow()); - movl(Address(ESP, FrameOffset(offs.Int32Value()+4)), - src.AsRegisterPairHigh()); - } else if (src.IsX87Register()) { - if (size == 4) { - fstps(Address(ESP, offs)); - } else { - fstpl(Address(ESP, offs)); - } - } else { - CHECK(src.IsXmmRegister()); - if (size == 4) { - movss(Address(ESP, offs), src.AsXmmRegister()); - } else { - movsd(Address(ESP, offs), src.AsXmmRegister()); - } - } -} - -void X86Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - X86ManagedRegister src = msrc.AsX86(); - CHECK(src.IsCpuRegister()); - movl(Address(ESP, dest), src.AsCpuRegister()); -} - -void X86Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { - X86ManagedRegister src = msrc.AsX86(); - CHECK(src.IsCpuRegister()); - movl(Address(ESP, dest), src.AsCpuRegister()); -} - -void X86Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, - ManagedRegister) { - movl(Address(ESP, dest), Immediate(imm)); -} - -void X86Assembler::StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, - ManagedRegister) { - fs()->movl(Address::Absolute(dest), Immediate(imm)); -} - -void X86Assembler::StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - X86ManagedRegister scratch = mscratch.AsX86(); - CHECK(scratch.IsCpuRegister()); - leal(scratch.AsCpuRegister(), Address(ESP, fr_offs)); - fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister()); -} - -void X86Assembler::StoreStackPointerToThread32(ThreadOffset<4> thr_offs) { - fs()->movl(Address::Absolute(thr_offs), ESP); -} - -void X86Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/, - FrameOffset /*in_off*/, ManagedRegister /*scratch*/) { - UNIMPLEMENTED(FATAL); // this case only currently exists for ARM -} - -void X86Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { - X86ManagedRegister dest = mdest.AsX86(); - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (dest.IsCpuRegister()) { - CHECK_EQ(4u, size); - movl(dest.AsCpuRegister(), Address(ESP, src)); - } else if (dest.IsRegisterPair()) { - CHECK_EQ(8u, size); - movl(dest.AsRegisterPairLow(), Address(ESP, src)); - movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4))); - } else if (dest.IsX87Register()) { - if (size == 4) { - flds(Address(ESP, src)); - } else { - fldl(Address(ESP, src)); - } - } else { - CHECK(dest.IsXmmRegister()); - if (size == 4) { - movss(dest.AsXmmRegister(), Address(ESP, src)); - } else { - movsd(dest.AsXmmRegister(), Address(ESP, src)); - } - } -} - -void X86Assembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) { - X86ManagedRegister dest = mdest.AsX86(); - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (dest.IsCpuRegister()) { - CHECK_EQ(4u, size); - fs()->movl(dest.AsCpuRegister(), Address::Absolute(src)); - } else if (dest.IsRegisterPair()) { - CHECK_EQ(8u, size); - fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src)); - fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset<4>(src.Int32Value()+4))); - } else if (dest.IsX87Register()) { - if (size == 4) { - fs()->flds(Address::Absolute(src)); - } else { - fs()->fldl(Address::Absolute(src)); - } - } else { - CHECK(dest.IsXmmRegister()); - if (size == 4) { - fs()->movss(dest.AsXmmRegister(), Address::Absolute(src)); - } else { - fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src)); - } - } -} - -void X86Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) { - X86ManagedRegister dest = mdest.AsX86(); - CHECK(dest.IsCpuRegister()); - movl(dest.AsCpuRegister(), Address(ESP, src)); -} - -void X86Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) { - X86ManagedRegister dest = mdest.AsX86(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs)); - if (unpoison_reference) { - MaybeUnpoisonHeapReference(dest.AsCpuRegister()); - } -} - -void X86Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, - Offset offs) { - X86ManagedRegister dest = mdest.AsX86(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs)); -} - -void X86Assembler::LoadRawPtrFromThread32(ManagedRegister mdest, - ThreadOffset<4> offs) { - X86ManagedRegister dest = mdest.AsX86(); - CHECK(dest.IsCpuRegister()); - fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs)); -} - -void X86Assembler::SignExtend(ManagedRegister mreg, size_t size) { - X86ManagedRegister reg = mreg.AsX86(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsCpuRegister()) << reg; - if (size == 1) { - movsxb(reg.AsCpuRegister(), reg.AsByteRegister()); - } else { - movsxw(reg.AsCpuRegister(), reg.AsCpuRegister()); - } -} - -void X86Assembler::ZeroExtend(ManagedRegister mreg, size_t size) { - X86ManagedRegister reg = mreg.AsX86(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsCpuRegister()) << reg; - if (size == 1) { - movzxb(reg.AsCpuRegister(), reg.AsByteRegister()); - } else { - movzxw(reg.AsCpuRegister(), reg.AsCpuRegister()); - } -} - -void X86Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { - X86ManagedRegister dest = mdest.AsX86(); - X86ManagedRegister src = msrc.AsX86(); - if (!dest.Equals(src)) { - if (dest.IsCpuRegister() && src.IsCpuRegister()) { - movl(dest.AsCpuRegister(), src.AsCpuRegister()); - } else if (src.IsX87Register() && dest.IsXmmRegister()) { - // Pass via stack and pop X87 register - subl(ESP, Immediate(16)); - if (size == 4) { - CHECK_EQ(src.AsX87Register(), ST0); - fstps(Address(ESP, 0)); - movss(dest.AsXmmRegister(), Address(ESP, 0)); - } else { - CHECK_EQ(src.AsX87Register(), ST0); - fstpl(Address(ESP, 0)); - movsd(dest.AsXmmRegister(), Address(ESP, 0)); - } - addl(ESP, Immediate(16)); - } else { - // TODO: x87, SSE - UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src; - } - } -} - -void X86Assembler::CopyRef(FrameOffset dest, FrameOffset src, - ManagedRegister mscratch) { - X86ManagedRegister scratch = mscratch.AsX86(); - CHECK(scratch.IsCpuRegister()); - movl(scratch.AsCpuRegister(), Address(ESP, src)); - movl(Address(ESP, dest), scratch.AsCpuRegister()); -} - -void X86Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs, - ThreadOffset<4> thr_offs, - ManagedRegister mscratch) { - X86ManagedRegister scratch = mscratch.AsX86(); - CHECK(scratch.IsCpuRegister()); - fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs)); - Store(fr_offs, scratch, 4); -} - -void X86Assembler::CopyRawPtrToThread32(ThreadOffset<4> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - X86ManagedRegister scratch = mscratch.AsX86(); - CHECK(scratch.IsCpuRegister()); - Load(scratch, fr_offs, 4); - fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister()); -} - -void X86Assembler::Copy(FrameOffset dest, FrameOffset src, - ManagedRegister mscratch, - size_t size) { - X86ManagedRegister scratch = mscratch.AsX86(); - if (scratch.IsCpuRegister() && size == 8) { - Load(scratch, src, 4); - Store(dest, scratch, 4); - Load(scratch, FrameOffset(src.Int32Value() + 4), 4); - Store(FrameOffset(dest.Int32Value() + 4), scratch, 4); - } else { - Load(scratch, src, size); - Store(dest, scratch, size); - } -} - -void X86Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*scratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void X86Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, - ManagedRegister scratch, size_t size) { - CHECK(scratch.IsNoRegister()); - CHECK_EQ(size, 4u); - pushl(Address(ESP, src)); - popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset)); -} - -void X86Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, - ManagedRegister mscratch, size_t size) { - Register scratch = mscratch.AsX86().AsCpuRegister(); - CHECK_EQ(size, 4u); - movl(scratch, Address(ESP, src_base)); - movl(scratch, Address(scratch, src_offset)); - movl(Address(ESP, dest), scratch); -} - -void X86Assembler::Copy(ManagedRegister dest, Offset dest_offset, - ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) { - CHECK_EQ(size, 4u); - CHECK(scratch.IsNoRegister()); - pushl(Address(src.AsX86().AsCpuRegister(), src_offset)); - popl(Address(dest.AsX86().AsCpuRegister(), dest_offset)); -} - -void X86Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister mscratch, size_t size) { - Register scratch = mscratch.AsX86().AsCpuRegister(); - CHECK_EQ(size, 4u); - CHECK_EQ(dest.Int32Value(), src.Int32Value()); - movl(scratch, Address(ESP, src)); - pushl(Address(scratch, src_offset)); - popl(Address(scratch, dest_offset)); -} - -void X86Assembler::MemoryBarrier(ManagedRegister) { - mfence(); -} - -void X86Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, - FrameOffset handle_scope_offset, - ManagedRegister min_reg, bool null_allowed) { - X86ManagedRegister out_reg = mout_reg.AsX86(); - X86ManagedRegister in_reg = min_reg.AsX86(); - CHECK(in_reg.IsCpuRegister()); - CHECK(out_reg.IsCpuRegister()); - VerifyObject(in_reg, null_allowed); - if (null_allowed) { - Label null_arg; - if (!out_reg.Equals(in_reg)) { - xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); - } - testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); - j(kZero, &null_arg); - leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset)); - Bind(&null_arg); - } else { - leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset)); - } -} - -void X86Assembler::CreateHandleScopeEntry(FrameOffset out_off, - FrameOffset handle_scope_offset, - ManagedRegister mscratch, - bool null_allowed) { - X86ManagedRegister scratch = mscratch.AsX86(); - CHECK(scratch.IsCpuRegister()); - if (null_allowed) { - Label null_arg; - movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset)); - testl(scratch.AsCpuRegister(), scratch.AsCpuRegister()); - j(kZero, &null_arg); - leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset)); - Bind(&null_arg); - } else { - leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset)); - } - Store(out_off, scratch, 4); -} - -// Given a handle scope entry, load the associated reference. -void X86Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, - ManagedRegister min_reg) { - X86ManagedRegister out_reg = mout_reg.AsX86(); - X86ManagedRegister in_reg = min_reg.AsX86(); - CHECK(out_reg.IsCpuRegister()); - CHECK(in_reg.IsCpuRegister()); - Label null_arg; - if (!out_reg.Equals(in_reg)) { - xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); - } - testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); - j(kZero, &null_arg); - movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0)); - Bind(&null_arg); -} - -void X86Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { - // TODO: not validating references -} - -void X86Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { - // TODO: not validating references -} - -void X86Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) { - X86ManagedRegister base = mbase.AsX86(); - CHECK(base.IsCpuRegister()); - call(Address(base.AsCpuRegister(), offset.Int32Value())); - // TODO: place reference map on call -} - -void X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { - Register scratch = mscratch.AsX86().AsCpuRegister(); - movl(scratch, Address(ESP, base)); - call(Address(scratch, offset)); -} - -void X86Assembler::CallFromThread32(ThreadOffset<4> offset, ManagedRegister /*mscratch*/) { - fs()->call(Address::Absolute(offset)); -} - -void X86Assembler::GetCurrentThread(ManagedRegister tr) { - fs()->movl(tr.AsX86().AsCpuRegister(), - Address::Absolute(Thread::SelfOffset<4>())); -} - -void X86Assembler::GetCurrentThread(FrameOffset offset, - ManagedRegister mscratch) { - X86ManagedRegister scratch = mscratch.AsX86(); - fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<4>())); - movl(Address(ESP, offset), scratch.AsCpuRegister()); -} - -void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) { - X86ExceptionSlowPath* slow = new (GetArena()) X86ExceptionSlowPath(stack_adjust); - buffer_.EnqueueSlowPath(slow); - fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<4>()), Immediate(0)); - j(kNotEqual, slow->Entry()); -} - -void X86ExceptionSlowPath::Emit(Assembler *sasm) { - X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - // Note: the return value is dead - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); - } - // Pass exception as argument in EAX - __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<4>())); - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(4, pDeliverException))); - // this call should never return - __ int3(); -#undef __ -} - void X86Assembler::AddConstantArea() { ArrayRef<const int32_t> area = constant_area_.GetBuffer(); // Generate the data for the literal area. diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 8567ad2a17..a747cda7bd 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -20,13 +20,14 @@ #include <vector> #include "base/arena_containers.h" +#include "base/array_ref.h" #include "base/bit_utils.h" +#include "base/enums.h" #include "base/macros.h" #include "constants_x86.h" #include "globals.h" #include "managed_register_x86.h" #include "offsets.h" -#include "utils/array_ref.h" #include "utils/assembler.h" namespace art { @@ -195,7 +196,7 @@ class Address : public Operand { return result; } - static Address Absolute(ThreadOffset<4> addr) { + static Address Absolute(ThreadOffset32 addr) { return Absolute(addr.Int32Value()); } @@ -370,7 +371,12 @@ class X86Assembler FINAL : public Assembler { void setb(Condition condition, Register dst); - void movaps(XmmRegister dst, XmmRegister src); + void movaps(XmmRegister dst, XmmRegister src); // move + void movaps(XmmRegister dst, const Address& src); // load aligned + void movups(XmmRegister dst, const Address& src); // load unaligned + void movaps(const Address& dst, XmmRegister src); // store aligned + void movups(const Address& dst, XmmRegister src); // store unaligned + void movss(XmmRegister dst, const Address& src); void movss(const Address& dst, XmmRegister src); void movss(XmmRegister dst, XmmRegister src); @@ -387,18 +393,24 @@ class X86Assembler FINAL : public Assembler { void divss(XmmRegister dst, XmmRegister src); void divss(XmmRegister dst, const Address& src); + void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void subps(XmmRegister dst, XmmRegister src); + void mulps(XmmRegister dst, XmmRegister src); + void divps(XmmRegister dst, XmmRegister src); + + void movapd(XmmRegister dst, XmmRegister src); // move + void movapd(XmmRegister dst, const Address& src); // load aligned + void movupd(XmmRegister dst, const Address& src); // load unaligned + void movapd(const Address& dst, XmmRegister src); // store aligned + void movupd(const Address& dst, XmmRegister src); // store unaligned + void movsd(XmmRegister dst, const Address& src); void movsd(const Address& dst, XmmRegister src); void movsd(XmmRegister dst, XmmRegister src); - void psrlq(XmmRegister reg, const Immediate& shift_count); - void punpckldq(XmmRegister dst, XmmRegister src); - void movhpd(XmmRegister dst, const Address& src); void movhpd(const Address& dst, XmmRegister src); - void psrldq(XmmRegister reg, const Immediate& shift_count); - void addsd(XmmRegister dst, XmmRegister src); void addsd(XmmRegister dst, const Address& src); void subsd(XmmRegister dst, XmmRegister src); @@ -408,6 +420,31 @@ class X86Assembler FINAL : public Assembler { void divsd(XmmRegister dst, XmmRegister src); void divsd(XmmRegister dst, const Address& src); + void addpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void subpd(XmmRegister dst, XmmRegister src); + void mulpd(XmmRegister dst, XmmRegister src); + void divpd(XmmRegister dst, XmmRegister src); + + void movdqa(XmmRegister dst, XmmRegister src); // move + void movdqa(XmmRegister dst, const Address& src); // load aligned + void movdqu(XmmRegister dst, const Address& src); // load unaligned + void movdqa(const Address& dst, XmmRegister src); // store aligned + void movdqu(const Address& dst, XmmRegister src); // store unaligned + + void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void psubb(XmmRegister dst, XmmRegister src); + + void paddw(XmmRegister dst, XmmRegister src); + void psubw(XmmRegister dst, XmmRegister src); + void pmullw(XmmRegister dst, XmmRegister src); + + void paddd(XmmRegister dst, XmmRegister src); + void psubd(XmmRegister dst, XmmRegister src); + void pmulld(XmmRegister dst, XmmRegister src); + + void paddq(XmmRegister dst, XmmRegister src); + void psubq(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, Register src); void cvtsi2sd(XmmRegister dst, Register src); @@ -420,10 +457,13 @@ class X86Assembler FINAL : public Assembler { void cvttss2si(Register dst, XmmRegister src); void cvttsd2si(Register dst, XmmRegister src); + void cvtdq2ps(XmmRegister dst, XmmRegister src); void cvtdq2pd(XmmRegister dst, XmmRegister src); void comiss(XmmRegister a, XmmRegister b); + void comiss(XmmRegister a, const Address& b); void comisd(XmmRegister a, XmmRegister b); + void comisd(XmmRegister a, const Address& b); void ucomiss(XmmRegister a, XmmRegister b); void ucomiss(XmmRegister a, const Address& b); void ucomisd(XmmRegister a, XmmRegister b); @@ -439,14 +479,56 @@ class X86Assembler FINAL : public Assembler { void xorpd(XmmRegister dst, XmmRegister src); void xorps(XmmRegister dst, const Address& src); void xorps(XmmRegister dst, XmmRegister src); + void pxor(XmmRegister dst, XmmRegister src); // no addr variant (for now) void andpd(XmmRegister dst, XmmRegister src); void andpd(XmmRegister dst, const Address& src); void andps(XmmRegister dst, XmmRegister src); void andps(XmmRegister dst, const Address& src); + void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now) - void orpd(XmmRegister dst, XmmRegister src); + void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnps(XmmRegister dst, XmmRegister src); + void pandn(XmmRegister dst, XmmRegister src); + + void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void orps(XmmRegister dst, XmmRegister src); + void por(XmmRegister dst, XmmRegister src); + + void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pavgw(XmmRegister dst, XmmRegister src); + + void pcmpeqb(XmmRegister dst, XmmRegister src); + void pcmpeqw(XmmRegister dst, XmmRegister src); + void pcmpeqd(XmmRegister dst, XmmRegister src); + void pcmpeqq(XmmRegister dst, XmmRegister src); + + void pcmpgtb(XmmRegister dst, XmmRegister src); + void pcmpgtw(XmmRegister dst, XmmRegister src); + void pcmpgtd(XmmRegister dst, XmmRegister src); + void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2 + + void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); + void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); + + void punpcklbw(XmmRegister dst, XmmRegister src); + void punpcklwd(XmmRegister dst, XmmRegister src); + void punpckldq(XmmRegister dst, XmmRegister src); + void punpcklqdq(XmmRegister dst, XmmRegister src); + + void psllw(XmmRegister reg, const Immediate& shift_count); + void pslld(XmmRegister reg, const Immediate& shift_count); + void psllq(XmmRegister reg, const Immediate& shift_count); + + void psraw(XmmRegister reg, const Immediate& shift_count); + void psrad(XmmRegister reg, const Immediate& shift_count); + // no psraq + + void psrlw(XmmRegister reg, const Immediate& shift_count); + void psrld(XmmRegister reg, const Immediate& shift_count); + void psrlq(XmmRegister reg, const Immediate& shift_count); + void psrldq(XmmRegister reg, const Immediate& shift_count); void flds(const Address& src); void fstps(const Address& dst); @@ -479,6 +561,7 @@ class X86Assembler FINAL : public Assembler { void xchgl(Register dst, Register src); void xchgl(Register reg, const Address& address); + void cmpb(const Address& address, const Immediate& imm); void cmpw(const Address& address, const Immediate& imm); void cmpl(Register reg, const Immediate& imm); @@ -492,6 +575,9 @@ class X86Assembler FINAL : public Assembler { void testl(Register reg, const Immediate& imm); void testl(Register reg1, const Address& address); + void testb(const Address& dst, const Immediate& imm); + void testl(const Address& dst, const Immediate& imm); + void andl(Register dst, const Immediate& imm); void andl(Register dst, Register src); void andl(Register dst, const Address& address); @@ -585,9 +671,12 @@ class X86Assembler FINAL : public Assembler { void jmp(Label* label); void jmp(NearLabel* label); + void repne_scasb(); void repne_scasw(); + void repe_cmpsb(); void repe_cmpsw(); void repe_cmpsl(); + void rep_movsb(); void rep_movsw(); X86Assembler* lock(); @@ -628,123 +717,6 @@ class X86Assembler FINAL : public Assembler { void Bind(NearLabel* label); // - // Overridden common assembler high-level functionality - // - - // Emit code that will create an activation on the stack - void BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - - // Emit code that will remove an activation from the stack - void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) - OVERRIDE; - - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; - - // Store routines - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; - - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; - - void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister scratch) - OVERRIDE; - - void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; - - void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE; - - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; - - // Load routines - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; - - void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size) OVERRIDE; - - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; - - void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; - - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; - - void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs) OVERRIDE; - - // Copying routines - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; - - void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs, - ManagedRegister scratch) OVERRIDE; - - void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; - - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - void MemoryBarrier(ManagedRegister) OVERRIDE; - - // Sign extension - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Zero extension - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Exploit fast access in managed code to Thread::Current() - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; - - // Set up out_reg to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. in_reg holds a possibly stale reference - // that can be used to avoid loading the handle scope entry to see if the value is - // null. - void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; - - // Set up out_off to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. - void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister scratch, bool null_allowed) OVERRIDE; - - // src holds a handle scope entry (Object**) load this into dst - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; - - // Heap::VerifyObject on src. In some cases (such as a reference to this) we - // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - - // Call to address held at [base+offset] - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch) OVERRIDE; - - // Generate code to check if Thread::Current()->exception_ is non-null - // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - - // // Heap poisoning. // @@ -752,6 +724,12 @@ class X86Assembler FINAL : public Assembler { void PoisonHeapReference(Register reg) { negl(reg); } // Unpoison a heap reference contained in `reg`. void UnpoisonHeapReference(Register reg) { negl(reg); } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(Register reg) { if (kPoisonHeapReferences) { @@ -841,15 +819,6 @@ inline void X86Assembler::EmitOperandSizeOverride() { EmitUint8(0x66); } -// Slowpath entered when Thread::Current()->_exception is non-null -class X86ExceptionSlowPath FINAL : public SlowPath { - public: - explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const size_t stack_adjust_; -}; - } // namespace x86 } // namespace art diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 1d1df6e447..f75f972265 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -122,18 +122,6 @@ TEST_F(AssemblerX86Test, Movntl) { DriverStr(expected, "movntl"); } -TEST_F(AssemblerX86Test, psrlq) { - GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32)); - const char* expected = "psrlq $0x20, %xmm0\n"; - DriverStr(expected, "psrlq"); -} - -TEST_F(AssemblerX86Test, punpckldq) { - GetAssembler()->punpckldq(x86::XMM0, x86::XMM1); - const char* expected = "punpckldq %xmm1, %xmm0\n"; - DriverStr(expected, "punpckldq"); -} - TEST_F(AssemblerX86Test, LoadLongConstant) { GetAssembler()->LoadLongConstant(x86::XMM0, 51); const char* expected = @@ -207,12 +195,24 @@ TEST_F(AssemblerX86Test, FPUIntegerStore) { DriverStr(expected, "FPUIntegerStore"); } +TEST_F(AssemblerX86Test, Repnescasb) { + GetAssembler()->repne_scasb(); + const char* expected = "repne scasb\n"; + DriverStr(expected, "Repnescasb"); +} + TEST_F(AssemblerX86Test, Repnescasw) { GetAssembler()->repne_scasw(); const char* expected = "repne scasw\n"; DriverStr(expected, "Repnescasw"); } +TEST_F(AssemblerX86Test, Repecmpsb) { + GetAssembler()->repe_cmpsb(); + const char* expected = "repe cmpsb\n"; + DriverStr(expected, "Repecmpsb"); +} + TEST_F(AssemblerX86Test, Repecmpsw) { GetAssembler()->repe_cmpsw(); const char* expected = "repe cmpsw\n"; @@ -225,10 +225,10 @@ TEST_F(AssemblerX86Test, Repecmpsl) { DriverStr(expected, "Repecmpsl"); } -TEST_F(AssemblerX86Test, RepneScasw) { - GetAssembler()->repne_scasw(); - const char* expected = "repne scasw\n"; - DriverStr(expected, "repne_scasw"); +TEST_F(AssemblerX86Test, RepMovsb) { + GetAssembler()->rep_movsb(); + const char* expected = "rep movsb\n"; + DriverStr(expected, "rep_movsb"); } TEST_F(AssemblerX86Test, RepMovsw) { @@ -322,18 +322,51 @@ TEST_F(AssemblerX86Test, RollImm) { DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli"); } +TEST_F(AssemblerX86Test, Cvtdq2ps) { + DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps"); +} + +TEST_F(AssemblerX86Test, Cvtdq2pd) { + DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd"); +} + +TEST_F(AssemblerX86Test, ComissAddr) { + GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0)); + const char* expected = "comiss 0(%EAX), %xmm0\n"; + DriverStr(expected, "comiss"); +} + TEST_F(AssemblerX86Test, UComissAddr) { GetAssembler()->ucomiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0)); const char* expected = "ucomiss 0(%EAX), %xmm0\n"; DriverStr(expected, "ucomiss"); } +TEST_F(AssemblerX86Test, ComisdAddr) { + GetAssembler()->comisd(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0)); + const char* expected = "comisd 0(%EAX), %xmm0\n"; + DriverStr(expected, "comisd"); +} + TEST_F(AssemblerX86Test, UComisdAddr) { GetAssembler()->ucomisd(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0)); const char* expected = "ucomisd 0(%EAX), %xmm0\n"; DriverStr(expected, "ucomisd"); } +TEST_F(AssemblerX86Test, RoundSS) { + GetAssembler()->roundss( + x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1), x86::Immediate(1)); + const char* expected = "roundss $1, %xmm1, %xmm0\n"; + DriverStr(expected, "roundss"); +} + +TEST_F(AssemblerX86Test, RoundSD) { + GetAssembler()->roundsd( + x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1), x86::Immediate(1)); + const char* expected = "roundsd $1, %xmm1, %xmm0\n"; + DriverStr(expected, "roundsd"); +} TEST_F(AssemblerX86Test, CmovlAddress) { GetAssembler()->cmovl(x86::kEqual, x86::Register(x86::EAX), x86::Address( @@ -350,6 +383,341 @@ TEST_F(AssemblerX86Test, CmovlAddress) { DriverStr(expected, "cmovl_address"); } +TEST_F(AssemblerX86Test, TestbAddressImmediate) { + GetAssembler()->testb( + x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Immediate(1)); + GetAssembler()->testb( + x86::Address(x86::Register(x86::ESP), FrameOffset(7)), + x86::Immediate(-128)); + GetAssembler()->testb( + x86::Address(x86::Register(x86::EBX), MemberOffset(130)), + x86::Immediate(127)); + const char* expected = + "testb $1, 0xc(%EDI,%EBX,4)\n" + "testb $-128, 0x7(%ESP)\n" + "testb $127, 0x82(%EBX)\n"; + + DriverStr(expected, "TestbAddressImmediate"); +} + +TEST_F(AssemblerX86Test, TestlAddressImmediate) { + GetAssembler()->testl( + x86::Address(x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Immediate(1)); + GetAssembler()->testl( + x86::Address(x86::Register(x86::ESP), FrameOffset(7)), + x86::Immediate(-100000)); + GetAssembler()->testl( + x86::Address(x86::Register(x86::EBX), MemberOffset(130)), + x86::Immediate(77777777)); + const char* expected = + "testl $1, 0xc(%EDI,%EBX,4)\n" + "testl $-100000, 0x7(%ESP)\n" + "testl $77777777, 0x82(%EBX)\n"; + + DriverStr(expected, "TestlAddressImmediate"); +} + +TEST_F(AssemblerX86Test, Movaps) { + DriverStr(RepeatFF(&x86::X86Assembler::movaps, "movaps %{reg2}, %{reg1}"), "movaps"); +} + +TEST_F(AssemblerX86Test, MovapsAddr) { + GetAssembler()->movaps(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4)); + GetAssembler()->movaps(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1)); + const char* expected = + "movaps 0x4(%ESP), %xmm0\n" + "movaps %xmm1, 0x2(%ESP)\n"; + DriverStr(expected, "movaps_address"); +} + +TEST_F(AssemblerX86Test, MovupsAddr) { + GetAssembler()->movups(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4)); + GetAssembler()->movups(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1)); + const char* expected = + "movups 0x4(%ESP), %xmm0\n" + "movups %xmm1, 0x2(%ESP)\n"; + DriverStr(expected, "movups_address"); +} + +TEST_F(AssemblerX86Test, Movapd) { + DriverStr(RepeatFF(&x86::X86Assembler::movapd, "movapd %{reg2}, %{reg1}"), "movapd"); +} + +TEST_F(AssemblerX86Test, MovapdAddr) { + GetAssembler()->movapd(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4)); + GetAssembler()->movapd(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1)); + const char* expected = + "movapd 0x4(%ESP), %xmm0\n" + "movapd %xmm1, 0x2(%ESP)\n"; + DriverStr(expected, "movapd_address"); +} + +TEST_F(AssemblerX86Test, MovupdAddr) { + GetAssembler()->movupd(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4)); + GetAssembler()->movupd(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1)); + const char* expected = + "movupd 0x4(%ESP), %xmm0\n" + "movupd %xmm1, 0x2(%ESP)\n"; + DriverStr(expected, "movupd_address"); +} + +TEST_F(AssemblerX86Test, Movdqa) { + DriverStr(RepeatFF(&x86::X86Assembler::movdqa, "movdqa %{reg2}, %{reg1}"), "movdqa"); +} + +TEST_F(AssemblerX86Test, MovdqaAddr) { + GetAssembler()->movdqa(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4)); + GetAssembler()->movdqa(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1)); + const char* expected = + "movdqa 0x4(%ESP), %xmm0\n" + "movdqa %xmm1, 0x2(%ESP)\n"; + DriverStr(expected, "movdqa_address"); +} + +TEST_F(AssemblerX86Test, MovdquAddr) { + GetAssembler()->movdqu(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4)); + GetAssembler()->movdqu(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1)); + const char* expected = + "movdqu 0x4(%ESP), %xmm0\n" + "movdqu %xmm1, 0x2(%ESP)\n"; + DriverStr(expected, "movdqu_address"); +} + +TEST_F(AssemblerX86Test, AddPS) { + DriverStr(RepeatFF(&x86::X86Assembler::addps, "addps %{reg2}, %{reg1}"), "addps"); +} + +TEST_F(AssemblerX86Test, AddPD) { + DriverStr(RepeatFF(&x86::X86Assembler::addpd, "addpd %{reg2}, %{reg1}"), "addpd"); +} + +TEST_F(AssemblerX86Test, SubPS) { + DriverStr(RepeatFF(&x86::X86Assembler::subps, "subps %{reg2}, %{reg1}"), "subps"); +} + +TEST_F(AssemblerX86Test, SubPD) { + DriverStr(RepeatFF(&x86::X86Assembler::subpd, "subpd %{reg2}, %{reg1}"), "subpd"); +} + +TEST_F(AssemblerX86Test, MulPS) { + DriverStr(RepeatFF(&x86::X86Assembler::mulps, "mulps %{reg2}, %{reg1}"), "mulps"); +} + +TEST_F(AssemblerX86Test, MulPD) { + DriverStr(RepeatFF(&x86::X86Assembler::mulpd, "mulpd %{reg2}, %{reg1}"), "mulpd"); +} + +TEST_F(AssemblerX86Test, DivPS) { + DriverStr(RepeatFF(&x86::X86Assembler::divps, "divps %{reg2}, %{reg1}"), "divps"); +} + +TEST_F(AssemblerX86Test, DivPD) { + DriverStr(RepeatFF(&x86::X86Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd"); +} + +TEST_F(AssemblerX86Test, PAddB) { + DriverStr(RepeatFF(&x86::X86Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb"); +} + +TEST_F(AssemblerX86Test, PSubB) { + DriverStr(RepeatFF(&x86::X86Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb"); +} + +TEST_F(AssemblerX86Test, PAddW) { + DriverStr(RepeatFF(&x86::X86Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw"); +} + +TEST_F(AssemblerX86Test, PSubW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw"); +} + +TEST_F(AssemblerX86Test, PMullW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw"); +} + +TEST_F(AssemblerX86Test, PAddD) { + DriverStr(RepeatFF(&x86::X86Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd"); +} + +TEST_F(AssemblerX86Test, PSubD) { + DriverStr(RepeatFF(&x86::X86Assembler::psubd, "psubd %{reg2}, %{reg1}"), "psubd"); +} + +TEST_F(AssemblerX86Test, PMullD) { + DriverStr(RepeatFF(&x86::X86Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld"); +} + +TEST_F(AssemblerX86Test, PAddQ) { + DriverStr(RepeatFF(&x86::X86Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq"); +} + +TEST_F(AssemblerX86Test, PSubQ) { + DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); +} + +TEST_F(AssemblerX86Test, XorPD) { + DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd"); +} + +TEST_F(AssemblerX86Test, XorPS) { + DriverStr(RepeatFF(&x86::X86Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps"); +} + +TEST_F(AssemblerX86Test, PXor) { + DriverStr(RepeatFF(&x86::X86Assembler::pxor, "pxor %{reg2}, %{reg1}"), "pxor"); +} + +TEST_F(AssemblerX86Test, AndPD) { + DriverStr(RepeatFF(&x86::X86Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd"); +} + +TEST_F(AssemblerX86Test, AndPS) { + DriverStr(RepeatFF(&x86::X86Assembler::andps, "andps %{reg2}, %{reg1}"), "andps"); +} + +TEST_F(AssemblerX86Test, PAnd) { + DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); +} + +TEST_F(AssemblerX86Test, AndnPD) { + DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); +} + +TEST_F(AssemblerX86Test, AndnPS) { + DriverStr(RepeatFF(&x86::X86Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps"); +} + +TEST_F(AssemblerX86Test, PAndn) { + DriverStr(RepeatFF(&x86::X86Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn"); +} + +TEST_F(AssemblerX86Test, OrPD) { + DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd"); +} + +TEST_F(AssemblerX86Test, OrPS) { + DriverStr(RepeatFF(&x86::X86Assembler::orps, "orps %{reg2}, %{reg1}"), "orps"); +} + +TEST_F(AssemblerX86Test, POr) { + DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por"); +} + +TEST_F(AssemblerX86Test, PAvgB) { + DriverStr(RepeatFF(&x86::X86Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb"); +} + +TEST_F(AssemblerX86Test, PAvgW) { + DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); +} + +TEST_F(AssemblerX86Test, PCmpeqB) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb"); +} + +TEST_F(AssemblerX86Test, PCmpeqW) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "cmpeqw"); +} + +TEST_F(AssemblerX86Test, PCmpeqD) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "cmpeqd"); +} + +TEST_F(AssemblerX86Test, PCmpeqQ) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "cmpeqq"); +} + +TEST_F(AssemblerX86Test, PCmpgtB) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "cmpgtb"); +} + +TEST_F(AssemblerX86Test, PCmpgtW) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "cmpgtw"); +} + +TEST_F(AssemblerX86Test, PCmpgtD) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "cmpgtd"); +} + +TEST_F(AssemblerX86Test, PCmpgtQ) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "cmpgtq"); +} + +TEST_F(AssemblerX86Test, ShufPS) { + DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps"); +} + +TEST_F(AssemblerX86Test, ShufPD) { + DriverStr(RepeatFFI(&x86::X86Assembler::shufpd, 1, "shufpd ${imm}, %{reg2}, %{reg1}"), "shufpd"); +} + +TEST_F(AssemblerX86Test, PShufD) { + DriverStr(RepeatFFI(&x86::X86Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd"); +} + +TEST_F(AssemblerX86Test, Punpcklbw) { + DriverStr(RepeatFF(&x86::X86Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw"); +} + +TEST_F(AssemblerX86Test, Punpcklwd) { + DriverStr(RepeatFF(&x86::X86Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd"); +} + +TEST_F(AssemblerX86Test, Punpckldq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq"); +} + +TEST_F(AssemblerX86Test, Punpcklqdq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); +} + +TEST_F(AssemblerX86Test, psllw) { + GetAssembler()->psllw(x86::XMM0, CreateImmediate(16)); + DriverStr("psllw $0x10, %xmm0\n", "psllwi"); +} + +TEST_F(AssemblerX86Test, pslld) { + GetAssembler()->pslld(x86::XMM0, CreateImmediate(16)); + DriverStr("pslld $0x10, %xmm0\n", "pslldi"); +} + +TEST_F(AssemblerX86Test, psllq) { + GetAssembler()->psllq(x86::XMM0, CreateImmediate(16)); + DriverStr("psllq $0x10, %xmm0\n", "psllqi"); +} + +TEST_F(AssemblerX86Test, psraw) { + GetAssembler()->psraw(x86::XMM0, CreateImmediate(16)); + DriverStr("psraw $0x10, %xmm0\n", "psrawi"); +} + +TEST_F(AssemblerX86Test, psrad) { + GetAssembler()->psrad(x86::XMM0, CreateImmediate(16)); + DriverStr("psrad $0x10, %xmm0\n", "psradi"); +} + +TEST_F(AssemblerX86Test, psrlw) { + GetAssembler()->psrlw(x86::XMM0, CreateImmediate(16)); + DriverStr("psrlw $0x10, %xmm0\n", "psrlwi"); +} + +TEST_F(AssemblerX86Test, psrld) { + GetAssembler()->psrld(x86::XMM0, CreateImmediate(16)); + DriverStr("psrld $0x10, %xmm0\n", "psrldi"); +} + +TEST_F(AssemblerX86Test, psrlq) { + GetAssembler()->psrlq(x86::XMM0, CreateImmediate(16)); + DriverStr("psrlq $0x10, %xmm0\n", "psrlqi"); +} + +TEST_F(AssemblerX86Test, psrldq) { + GetAssembler()->psrldq(x86::XMM0, CreateImmediate(16)); + DriverStr("psrldq $0x10, %xmm0\n", "psrldqi"); +} + ///////////////// // Near labels // ///////////////// @@ -389,4 +757,10 @@ TEST_F(AssemblerX86Test, NearLabel) { DriverStr(expected, "near_label"); } +TEST_F(AssemblerX86Test, Cmpb) { + GetAssembler()->cmpb(x86::Address(x86::EDI, 128), x86::Immediate(0)); + const char* expected = "cmpb $0, 128(%EDI)\n"; + DriverStr(expected, "cmpb"); +} + } // namespace art diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h index 2dfb65c479..0bc1560ed7 100644 --- a/compiler/utils/x86/constants_x86.h +++ b/compiler/utils/x86/constants_x86.h @@ -97,6 +97,8 @@ enum Condition { kNotZero = kNotEqual, kNegative = kSign, kPositive = kNotSign, + kCarrySet = kBelow, + kCarryClear = kAboveEqual, kUnordered = kParityEven }; diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc new file mode 100644 index 0000000000..cfdf80ba50 --- /dev/null +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -0,0 +1,587 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_x86.h" + +#include "utils/assembler.h" +#include "base/casts.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "thread.h" + +namespace art { +namespace x86 { + +// Slowpath entered when Thread::Current()->_exception is non-null +class X86ExceptionSlowPath FINAL : public SlowPath { + public: + explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {} + virtual void Emit(Assembler *sp_asm) OVERRIDE; + private: + const size_t stack_adjust_; +}; + +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86Core(static_cast<int>(reg)); +} + +constexpr size_t kFramePointerSize = 4; + +#define __ asm_. + +void X86JNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> spill_regs, + const ManagedRegisterEntrySpills& entry_spills) { + DCHECK_EQ(CodeSize(), 0U); // Nothing emitted yet. + cfi().SetCurrentCFAOffset(4); // Return address on stack. + CHECK_ALIGNED(frame_size, kStackAlignment); + int gpr_count = 0; + for (int i = spill_regs.size() - 1; i >= 0; --i) { + Register spill = spill_regs[i].AsX86().AsCpuRegister(); + __ pushl(spill); + gpr_count++; + cfi().AdjustCFAOffset(kFramePointerSize); + cfi().RelOffset(DWARFReg(spill), 0); + } + + // return address then method on stack. + int32_t adjust = frame_size - gpr_count * kFramePointerSize - + kFramePointerSize /*method*/ - + kFramePointerSize /*return address*/; + __ addl(ESP, Immediate(-adjust)); + cfi().AdjustCFAOffset(adjust); + __ pushl(method_reg.AsX86().AsCpuRegister()); + cfi().AdjustCFAOffset(kFramePointerSize); + DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size); + + for (size_t i = 0; i < entry_spills.size(); ++i) { + ManagedRegisterSpill spill = entry_spills.at(i); + if (spill.AsX86().IsCpuRegister()) { + int offset = frame_size + spill.getSpillOffset(); + __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister()); + } else { + DCHECK(spill.AsX86().IsXmmRegister()); + if (spill.getSize() == 8) { + __ movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister()); + } else { + CHECK_EQ(spill.getSize(), 4); + __ movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister()); + } + } + } +} + +void X86JNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> spill_regs) { + CHECK_ALIGNED(frame_size, kStackAlignment); + cfi().RememberState(); + // -kFramePointerSize for ArtMethod*. + int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize; + __ addl(ESP, Immediate(adjust)); + cfi().AdjustCFAOffset(-adjust); + for (size_t i = 0; i < spill_regs.size(); ++i) { + Register spill = spill_regs[i].AsX86().AsCpuRegister(); + __ popl(spill); + cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); + cfi().Restore(DWARFReg(spill)); + } + __ ret(); + // The CFI should be restored for any code that follows the exit block. + cfi().RestoreState(); + cfi().DefCFAOffset(frame_size); +} + +void X86JNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + __ addl(ESP, Immediate(-adjust)); + cfi().AdjustCFAOffset(adjust); +} + +static void DecreaseFrameSizeImpl(X86Assembler* assembler, size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + assembler->addl(ESP, Immediate(adjust)); + assembler->cfi().AdjustCFAOffset(-adjust); +} + +void X86JNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + DecreaseFrameSizeImpl(&asm_, adjust); +} + +void X86JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { + X86ManagedRegister src = msrc.AsX86(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsCpuRegister()) { + CHECK_EQ(4u, size); + __ movl(Address(ESP, offs), src.AsCpuRegister()); + } else if (src.IsRegisterPair()) { + CHECK_EQ(8u, size); + __ movl(Address(ESP, offs), src.AsRegisterPairLow()); + __ movl(Address(ESP, FrameOffset(offs.Int32Value()+4)), src.AsRegisterPairHigh()); + } else if (src.IsX87Register()) { + if (size == 4) { + __ fstps(Address(ESP, offs)); + } else { + __ fstpl(Address(ESP, offs)); + } + } else { + CHECK(src.IsXmmRegister()); + if (size == 4) { + __ movss(Address(ESP, offs), src.AsXmmRegister()); + } else { + __ movsd(Address(ESP, offs), src.AsXmmRegister()); + } + } +} + +void X86JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { + X86ManagedRegister src = msrc.AsX86(); + CHECK(src.IsCpuRegister()); + __ movl(Address(ESP, dest), src.AsCpuRegister()); +} + +void X86JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { + X86ManagedRegister src = msrc.AsX86(); + CHECK(src.IsCpuRegister()); + __ movl(Address(ESP, dest), src.AsCpuRegister()); +} + +void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister) { + __ movl(Address(ESP, dest), Immediate(imm)); +} + +void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + X86ManagedRegister scratch = mscratch.AsX86(); + CHECK(scratch.IsCpuRegister()); + __ leal(scratch.AsCpuRegister(), Address(ESP, fr_offs)); + __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister()); +} + +void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { + __ fs()->movl(Address::Absolute(thr_offs), ESP); +} + +void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/, + ManagedRegister /*src*/, + FrameOffset /*in_off*/, + ManagedRegister /*scratch*/) { + UNIMPLEMENTED(FATAL); // this case only currently exists for ARM +} + +void X86JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { + X86ManagedRegister dest = mdest.AsX86(); + if (dest.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (dest.IsCpuRegister()) { + CHECK_EQ(4u, size); + __ movl(dest.AsCpuRegister(), Address(ESP, src)); + } else if (dest.IsRegisterPair()) { + CHECK_EQ(8u, size); + __ movl(dest.AsRegisterPairLow(), Address(ESP, src)); + __ movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4))); + } else if (dest.IsX87Register()) { + if (size == 4) { + __ flds(Address(ESP, src)); + } else { + __ fldl(Address(ESP, src)); + } + } else { + CHECK(dest.IsXmmRegister()); + if (size == 4) { + __ movss(dest.AsXmmRegister(), Address(ESP, src)); + } else { + __ movsd(dest.AsXmmRegister(), Address(ESP, src)); + } + } +} + +void X86JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) { + X86ManagedRegister dest = mdest.AsX86(); + if (dest.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (dest.IsCpuRegister()) { + if (size == 1u) { + __ fs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src)); + } else { + CHECK_EQ(4u, size); + __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src)); + } + } else if (dest.IsRegisterPair()) { + CHECK_EQ(8u, size); + __ fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src)); + __ fs()->movl(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset32(src.Int32Value()+4))); + } else if (dest.IsX87Register()) { + if (size == 4) { + __ fs()->flds(Address::Absolute(src)); + } else { + __ fs()->fldl(Address::Absolute(src)); + } + } else { + CHECK(dest.IsXmmRegister()); + if (size == 4) { + __ fs()->movss(dest.AsXmmRegister(), Address::Absolute(src)); + } else { + __ fs()->movsd(dest.AsXmmRegister(), Address::Absolute(src)); + } + } +} + +void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { + X86ManagedRegister dest = mdest.AsX86(); + CHECK(dest.IsCpuRegister()); + __ movl(dest.AsCpuRegister(), Address(ESP, src)); +} + +void X86JNIMacroAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, + bool unpoison_reference) { + X86ManagedRegister dest = mdest.AsX86(); + CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); + __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs)); + if (unpoison_reference) { + __ MaybeUnpoisonHeapReference(dest.AsCpuRegister()); + } +} + +void X86JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, + ManagedRegister base, + Offset offs) { + X86ManagedRegister dest = mdest.AsX86(); + CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); + __ movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs)); +} + +void X86JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) { + X86ManagedRegister dest = mdest.AsX86(); + CHECK(dest.IsCpuRegister()); + __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs)); +} + +void X86JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) { + X86ManagedRegister reg = mreg.AsX86(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsCpuRegister()) << reg; + if (size == 1) { + __ movsxb(reg.AsCpuRegister(), reg.AsByteRegister()); + } else { + __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister()); + } +} + +void X86JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) { + X86ManagedRegister reg = mreg.AsX86(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsCpuRegister()) << reg; + if (size == 1) { + __ movzxb(reg.AsCpuRegister(), reg.AsByteRegister()); + } else { + __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister()); + } +} + +void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { + X86ManagedRegister dest = mdest.AsX86(); + X86ManagedRegister src = msrc.AsX86(); + if (!dest.Equals(src)) { + if (dest.IsCpuRegister() && src.IsCpuRegister()) { + __ movl(dest.AsCpuRegister(), src.AsCpuRegister()); + } else if (src.IsX87Register() && dest.IsXmmRegister()) { + // Pass via stack and pop X87 register + __ subl(ESP, Immediate(16)); + if (size == 4) { + CHECK_EQ(src.AsX87Register(), ST0); + __ fstps(Address(ESP, 0)); + __ movss(dest.AsXmmRegister(), Address(ESP, 0)); + } else { + CHECK_EQ(src.AsX87Register(), ST0); + __ fstpl(Address(ESP, 0)); + __ movsd(dest.AsXmmRegister(), Address(ESP, 0)); + } + __ addl(ESP, Immediate(16)); + } else { + // TODO: x87, SSE + UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src; + } + } +} + +void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) { + X86ManagedRegister scratch = mscratch.AsX86(); + CHECK(scratch.IsCpuRegister()); + __ movl(scratch.AsCpuRegister(), Address(ESP, src)); + __ movl(Address(ESP, dest), scratch.AsCpuRegister()); +} + +void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister mscratch) { + X86ManagedRegister scratch = mscratch.AsX86(); + CHECK(scratch.IsCpuRegister()); + __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs)); + Store(fr_offs, scratch, 4); +} + +void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + X86ManagedRegister scratch = mscratch.AsX86(); + CHECK(scratch.IsCpuRegister()); + Load(scratch, fr_offs, 4); + __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister()); +} + +void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, + ManagedRegister mscratch, + size_t size) { + X86ManagedRegister scratch = mscratch.AsX86(); + if (scratch.IsCpuRegister() && size == 8) { + Load(scratch, src, 4); + Store(dest, scratch, 4); + Load(scratch, FrameOffset(src.Int32Value() + 4), 4); + Store(FrameOffset(dest.Int32Value() + 4), scratch, 4); + } else { + Load(scratch, src, size); + Store(dest, scratch, size); + } +} + +void X86JNIMacroAssembler::Copy(FrameOffset /*dst*/, + ManagedRegister /*src_base*/, + Offset /*src_offset*/, + ManagedRegister /*scratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL); +} + +void X86JNIMacroAssembler::Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister scratch, + size_t size) { + CHECK(scratch.IsNoRegister()); + CHECK_EQ(size, 4u); + __ pushl(Address(ESP, src)); + __ popl(Address(dest_base.AsX86().AsCpuRegister(), dest_offset)); +} + +void X86JNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + Register scratch = mscratch.AsX86().AsCpuRegister(); + CHECK_EQ(size, 4u); + __ movl(scratch, Address(ESP, src_base)); + __ movl(scratch, Address(scratch, src_offset)); + __ movl(Address(ESP, dest), scratch); +} + +void X86JNIMacroAssembler::Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister scratch, + size_t size) { + CHECK_EQ(size, 4u); + CHECK(scratch.IsNoRegister()); + __ pushl(Address(src.AsX86().AsCpuRegister(), src_offset)); + __ popl(Address(dest.AsX86().AsCpuRegister(), dest_offset)); +} + +void X86JNIMacroAssembler::Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + Register scratch = mscratch.AsX86().AsCpuRegister(); + CHECK_EQ(size, 4u); + CHECK_EQ(dest.Int32Value(), src.Int32Value()); + __ movl(scratch, Address(ESP, src)); + __ pushl(Address(scratch, src_offset)); + __ popl(Address(scratch, dest_offset)); +} + +void X86JNIMacroAssembler::MemoryBarrier(ManagedRegister) { + __ mfence(); +} + +void X86JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, + FrameOffset handle_scope_offset, + ManagedRegister min_reg, + bool null_allowed) { + X86ManagedRegister out_reg = mout_reg.AsX86(); + X86ManagedRegister in_reg = min_reg.AsX86(); + CHECK(in_reg.IsCpuRegister()); + CHECK(out_reg.IsCpuRegister()); + VerifyObject(in_reg, null_allowed); + if (null_allowed) { + Label null_arg; + if (!out_reg.Equals(in_reg)) { + __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); + } + __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); + __ j(kZero, &null_arg); + __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset)); + __ Bind(&null_arg); + } else { + __ leal(out_reg.AsCpuRegister(), Address(ESP, handle_scope_offset)); + } +} + +void X86JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister mscratch, + bool null_allowed) { + X86ManagedRegister scratch = mscratch.AsX86(); + CHECK(scratch.IsCpuRegister()); + if (null_allowed) { + Label null_arg; + __ movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset)); + __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister()); + __ j(kZero, &null_arg); + __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset)); + __ Bind(&null_arg); + } else { + __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset)); + } + Store(out_off, scratch, 4); +} + +// Given a handle scope entry, load the associated reference. +void X86JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, + ManagedRegister min_reg) { + X86ManagedRegister out_reg = mout_reg.AsX86(); + X86ManagedRegister in_reg = min_reg.AsX86(); + CHECK(out_reg.IsCpuRegister()); + CHECK(in_reg.IsCpuRegister()); + Label null_arg; + if (!out_reg.Equals(in_reg)) { + __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); + } + __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); + __ j(kZero, &null_arg); + __ movl(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0)); + __ Bind(&null_arg); +} + +void X86JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { + // TODO: not validating references +} + +void X86JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { + // TODO: not validating references +} + +void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) { + X86ManagedRegister base = mbase.AsX86(); + CHECK(base.IsCpuRegister()); + __ call(Address(base.AsCpuRegister(), offset.Int32Value())); + // TODO: place reference map on call +} + +void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { + Register scratch = mscratch.AsX86().AsCpuRegister(); + __ movl(scratch, Address(ESP, base)); + __ call(Address(scratch, offset)); +} + +void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) { + __ fs()->call(Address::Absolute(offset)); +} + +void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) { + __ fs()->movl(tr.AsX86().AsCpuRegister(), + Address::Absolute(Thread::SelfOffset<kX86PointerSize>())); +} + +void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset, + ManagedRegister mscratch) { + X86ManagedRegister scratch = mscratch.AsX86(); + __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>())); + __ movl(Address(ESP, offset), scratch.AsCpuRegister()); +} + +void X86JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) { + X86ExceptionSlowPath* slow = new (__ GetArena()) X86ExceptionSlowPath(stack_adjust); + __ GetBuffer()->EnqueueSlowPath(slow); + __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0)); + __ j(kNotEqual, slow->Entry()); +} + +std::unique_ptr<JNIMacroLabel> X86JNIMacroAssembler::CreateLabel() { + return std::unique_ptr<JNIMacroLabel>(new X86JNIMacroLabel()); +} + +void X86JNIMacroAssembler::Jump(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ jmp(X86JNIMacroLabel::Cast(label)->AsX86()); +} + +void X86JNIMacroAssembler::Jump(JNIMacroLabel* label, + JNIMacroUnaryCondition condition, + ManagedRegister test) { + CHECK(label != nullptr); + + art::x86::Condition x86_cond; + switch (condition) { + case JNIMacroUnaryCondition::kZero: + x86_cond = art::x86::kZero; + break; + case JNIMacroUnaryCondition::kNotZero: + x86_cond = art::x86::kNotZero; + break; + default: + LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition); + UNREACHABLE(); + } + + // TEST reg, reg + // Jcc <Offset> + __ testl(test.AsX86().AsCpuRegister(), test.AsX86().AsCpuRegister()); + __ j(x86_cond, X86JNIMacroLabel::Cast(label)->AsX86()); + + + // X86 also has JCZX, JECZX, however it's not worth it to implement + // because we aren't likely to codegen with ECX+kZero check. +} + +void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ Bind(X86JNIMacroLabel::Cast(label)->AsX86()); +} + +#undef __ + +void X86ExceptionSlowPath::Emit(Assembler *sasm) { + X86Assembler* sp_asm = down_cast<X86Assembler*>(sasm); +#define __ sp_asm-> + __ Bind(&entry_); + // Note: the return value is dead + if (stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSizeImpl(sp_asm, stack_adjust_); + } + // Pass exception as argument in EAX + __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>())); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, pDeliverException))); + // this call should never return + __ int3(); +#undef __ +} + +} // namespace x86 +} // namespace art diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h new file mode 100644 index 0000000000..8ffda6425e --- /dev/null +++ b/compiler/utils/x86/jni_macro_assembler_x86.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_ +#define ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_ + +#include <vector> + +#include "assembler_x86.h" +#include "base/arena_containers.h" +#include "base/array_ref.h" +#include "base/enums.h" +#include "base/macros.h" +#include "offsets.h" +#include "utils/jni_macro_assembler.h" + +namespace art { +namespace x86 { + +class X86JNIMacroLabel; + +class X86JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86Assembler, PointerSize::k32> { + public: + explicit X86JNIMacroAssembler(ArenaAllocator* arena) : JNIMacroAssemblerFwd(arena) {} + virtual ~X86JNIMacroAssembler() {} + + // + // Overridden common assembler high-level functionality + // + + // Emit code that will create an activation on the stack + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) + OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines + void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + + void StoreStackOffsetToThread(ThreadOffset32 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + + void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; + + void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, + ManagedRegister scratch) OVERRIDE; + + // Load routines + void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + + void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE; + + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + + void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, + bool unpoison_reference) OVERRIDE; + + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE; + + // Copying routines + void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset32 thr_offs, + ManagedRegister scratch) OVERRIDE; + + void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) + OVERRIDE; + + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, + ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, + ManagedRegister scratch, size_t size) OVERRIDE; + + void MemoryBarrier(ManagedRegister) OVERRIDE; + + // Sign extension + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current() + void GetCurrentThread(ManagedRegister tr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, + ManagedRegister in_reg, bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, + ManagedRegister scratch, bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset] + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; + void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label) OVERRIDE; + // Emit a conditional jump to the label by applying a unary condition test to the register. + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label) OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler); +}; + +class X86JNIMacroLabel FINAL + : public JNIMacroLabelCommon<X86JNIMacroLabel, + art::Label, + kX86> { + public: + art::Label* AsX86() { + return AsPlatformLabel(); + } +}; + +} // namespace x86 +} // namespace art + +#endif // ART_COMPILER_UTILS_X86_JNI_MACRO_ASSEMBLER_X86_H_ diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h index fc20d7e208..c0c2b650e9 100644 --- a/compiler/utils/x86/managed_register_x86.h +++ b/compiler/utils/x86/managed_register_x86.h @@ -89,64 +89,64 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds + // There is a one-to-one mapping between ManagedRegister and register id. class X86ManagedRegister : public ManagedRegister { public: - ByteRegister AsByteRegister() const { + constexpr ByteRegister AsByteRegister() const { CHECK(IsCpuRegister()); CHECK_LT(AsCpuRegister(), ESP); // ESP, EBP, ESI and EDI cannot be encoded as byte registers. return static_cast<ByteRegister>(id_); } - Register AsCpuRegister() const { + constexpr Register AsCpuRegister() const { CHECK(IsCpuRegister()); return static_cast<Register>(id_); } - XmmRegister AsXmmRegister() const { + constexpr XmmRegister AsXmmRegister() const { CHECK(IsXmmRegister()); return static_cast<XmmRegister>(id_ - kNumberOfCpuRegIds); } - X87Register AsX87Register() const { + constexpr X87Register AsX87Register() const { CHECK(IsX87Register()); return static_cast<X87Register>(id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds)); } - Register AsRegisterPairLow() const { + constexpr Register AsRegisterPairLow() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdLow(). return FromRegId(AllocIdLow()).AsCpuRegister(); } - Register AsRegisterPairHigh() const { + constexpr Register AsRegisterPairHigh() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdHigh(). return FromRegId(AllocIdHigh()).AsCpuRegister(); } - RegisterPair AsRegisterPair() const { + constexpr RegisterPair AsRegisterPair() const { CHECK(IsRegisterPair()); return static_cast<RegisterPair>(id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds)); } - bool IsCpuRegister() const { + constexpr bool IsCpuRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfCpuRegIds); } - bool IsXmmRegister() const { + constexpr bool IsXmmRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - kNumberOfCpuRegIds; return (0 <= test) && (test < kNumberOfXmmRegIds); } - bool IsX87Register() const { + constexpr bool IsX87Register() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds); return (0 <= test) && (test < kNumberOfX87RegIds); } - bool IsRegisterPair() const { + constexpr bool IsRegisterPair() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds); @@ -160,33 +160,33 @@ class X86ManagedRegister : public ManagedRegister { // then false is returned. bool Overlaps(const X86ManagedRegister& other) const; - static X86ManagedRegister FromCpuRegister(Register r) { + static constexpr X86ManagedRegister FromCpuRegister(Register r) { CHECK_NE(r, kNoRegister); return FromRegId(r); } - static X86ManagedRegister FromXmmRegister(XmmRegister r) { + static constexpr X86ManagedRegister FromXmmRegister(XmmRegister r) { CHECK_NE(r, kNoXmmRegister); return FromRegId(r + kNumberOfCpuRegIds); } - static X86ManagedRegister FromX87Register(X87Register r) { + static constexpr X86ManagedRegister FromX87Register(X87Register r) { CHECK_NE(r, kNoX87Register); return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds); } - static X86ManagedRegister FromRegisterPair(RegisterPair r) { + static constexpr X86ManagedRegister FromRegisterPair(RegisterPair r) { CHECK_NE(r, kNoRegisterPair); return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds)); } private: - bool IsValidManagedRegister() const { + constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); } - int RegId() const { + constexpr int RegId() const { CHECK(!IsNoRegister()); return id_; } @@ -202,9 +202,9 @@ class X86ManagedRegister : public ManagedRegister { friend class ManagedRegister; - explicit X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + explicit constexpr X86ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} - static X86ManagedRegister FromRegId(int reg_id) { + static constexpr X86ManagedRegister FromRegId(int reg_id) { X86ManagedRegister reg(reg_id); CHECK(reg.IsValidManagedRegister()); return reg; @@ -215,7 +215,7 @@ std::ostream& operator<<(std::ostream& os, const X86ManagedRegister& reg); } // namespace x86 -inline x86::X86ManagedRegister ManagedRegister::AsX86() const { +constexpr inline x86::X86ManagedRegister ManagedRegister::AsX86() const { x86::X86ManagedRegister reg(id_); CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); return reg; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 32eb4a37bf..1b7a4850db 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -386,6 +386,42 @@ void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) { } +void X86_64Assembler::movaps(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x28); + EmitOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movups(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x10); + EmitOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movaps(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(src, dst); + EmitUint8(0x0F); + EmitUint8(0x29); + EmitOperand(src.LowBits(), dst); +} + + +void X86_64Assembler::movups(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(src, dst); + EmitUint8(0x0F); + EmitUint8(0x11); + EmitOperand(src.LowBits(), dst); +} + + void X86_64Assembler::movss(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -539,6 +575,42 @@ void X86_64Assembler::divss(XmmRegister dst, const Address& src) { } +void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x58); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5C); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x59); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5E); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::flds(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); @@ -560,6 +632,56 @@ void X86_64Assembler::fstps(const Address& dst) { } +void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x28); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movapd(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x28); + EmitOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movupd(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x10); + EmitOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movapd(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(src, dst); + EmitUint8(0x0F); + EmitUint8(0x29); + EmitOperand(src.LowBits(), dst); +} + + +void X86_64Assembler::movupd(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(src, dst); + EmitUint8(0x0F); + EmitUint8(0x11); + EmitOperand(src.LowBits(), dst); +} + + void X86_64Assembler::movsd(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); @@ -670,6 +792,197 @@ void X86_64Assembler::divsd(XmmRegister dst, const Address& src) { } +void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x58); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5C); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x59); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5E); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6F); + EmitOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6F); + EmitOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(src, dst); + EmitUint8(0x0F); + EmitUint8(0x7F); + EmitOperand(src.LowBits(), dst); +} + + +void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitOptionalRex32(src, dst); + EmitUint8(0x0F); + EmitUint8(0x7F); + EmitOperand(src.LowBits(), dst); +} + + +void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFC); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xF8); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFD); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xF9); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD5); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFE); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFA); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x40); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD4); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFB); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) { cvtsi2ss(dst, src, false); } @@ -840,6 +1153,15 @@ void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) { } +void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5B); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -1008,6 +1330,16 @@ void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) { } +void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEF); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::andpd(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1034,6 +1366,41 @@ void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDB); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x55); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDF); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -1051,6 +1418,275 @@ void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::por(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEB); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE0); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE3); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x74); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x75); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x76); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x29); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x64); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x65); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x66); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x37); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xC6); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + +void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xC6); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + +void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x70); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + +void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x60); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x61); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x62); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6C); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + void X86_64Assembler::fldl(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xDD); @@ -1224,6 +1860,16 @@ void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) { } +void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); + EmitOptionalRex32(address); + EmitUint8(0x80); + EmitOperand(7, address); + EmitUint8(imm.value() & 0xFF); +} + + void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int32()); @@ -1379,6 +2025,25 @@ void X86_64Assembler::testq(CpuRegister reg, const Address& address) { } +void X86_64Assembler::testb(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst); + EmitUint8(0xF6); + EmitOperand(Register::RAX, dst); + CHECK(imm.is_int8()); + EmitUint8(imm.value() & 0xFF); +} + + +void X86_64Assembler::testl(const Address& dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst); + EmitUint8(0xF7); + EmitOperand(0, dst); + EmitImmediate(imm); +} + + void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); @@ -2296,6 +2961,12 @@ void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) { EmitOperand(dst.LowBits(), src); } +void X86_64Assembler::repne_scasb() { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF2); + EmitUint8(0xAE); +} + void X86_64Assembler::repne_scasw() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -2303,7 +2974,6 @@ void X86_64Assembler::repne_scasw() { EmitUint8(0xAF); } - void X86_64Assembler::repe_cmpsw() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -2629,543 +3299,6 @@ void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const } } -static dwarf::Reg DWARFReg(Register reg) { - return dwarf::Reg::X86_64Core(static_cast<int>(reg)); -} -static dwarf::Reg DWARFReg(FloatRegister reg) { - return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); -} - -constexpr size_t kFramePointerSize = 8; - -void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& spill_regs, - const ManagedRegisterEntrySpills& entry_spills) { - DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. - cfi_.SetCurrentCFAOffset(8); // Return address on stack. - CHECK_ALIGNED(frame_size, kStackAlignment); - int gpr_count = 0; - for (int i = spill_regs.size() - 1; i >= 0; --i) { - x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); - if (spill.IsCpuRegister()) { - pushq(spill.AsCpuRegister()); - gpr_count++; - cfi_.AdjustCFAOffset(kFramePointerSize); - cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0); - } - } - // return address then method on stack. - int64_t rest_of_frame = static_cast<int64_t>(frame_size) - - (gpr_count * kFramePointerSize) - - kFramePointerSize /*return address*/; - subq(CpuRegister(RSP), Immediate(rest_of_frame)); - cfi_.AdjustCFAOffset(rest_of_frame); - - // spill xmms - int64_t offset = rest_of_frame; - for (int i = spill_regs.size() - 1; i >= 0; --i) { - x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); - if (spill.IsXmmRegister()) { - offset -= sizeof(double); - movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister()); - cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset); - } - } - - DCHECK_EQ(kX86_64PointerSize, kFramePointerSize); - - movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister()); - - for (size_t i = 0; i < entry_spills.size(); ++i) { - ManagedRegisterSpill spill = entry_spills.at(i); - if (spill.AsX86_64().IsCpuRegister()) { - if (spill.getSize() == 8) { - movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), - spill.AsX86_64().AsCpuRegister()); - } else { - CHECK_EQ(spill.getSize(), 4); - movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsCpuRegister()); - } - } else { - if (spill.getSize() == 8) { - movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister()); - } else { - CHECK_EQ(spill.getSize(), 4); - movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister()); - } - } - } -} - -void X86_64Assembler::RemoveFrame(size_t frame_size, - const std::vector<ManagedRegister>& spill_regs) { - CHECK_ALIGNED(frame_size, kStackAlignment); - cfi_.RememberState(); - int gpr_count = 0; - // unspill xmms - int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize; - for (size_t i = 0; i < spill_regs.size(); ++i) { - x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); - if (spill.IsXmmRegister()) { - offset += sizeof(double); - movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset)); - cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister())); - } else { - gpr_count++; - } - } - int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize; - addq(CpuRegister(RSP), Immediate(adjust)); - cfi_.AdjustCFAOffset(-adjust); - for (size_t i = 0; i < spill_regs.size(); ++i) { - x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); - if (spill.IsCpuRegister()) { - popq(spill.AsCpuRegister()); - cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); - cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister())); - } - } - ret(); - // The CFI should be restored for any code that follows the exit block. - cfi_.RestoreState(); - cfi_.DefCFAOffset(frame_size); -} - -void X86_64Assembler::IncreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust))); - cfi_.AdjustCFAOffset(adjust); -} - -void X86_64Assembler::DecreaseFrameSize(size_t adjust) { - CHECK_ALIGNED(adjust, kStackAlignment); - addq(CpuRegister(RSP), Immediate(adjust)); - cfi_.AdjustCFAOffset(-adjust); -} - -void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { - X86_64ManagedRegister src = msrc.AsX86_64(); - if (src.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (src.IsCpuRegister()) { - if (size == 4) { - CHECK_EQ(4u, size); - movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister()); - } else { - CHECK_EQ(8u, size); - movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister()); - } - } else if (src.IsRegisterPair()) { - CHECK_EQ(0u, size); - movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow()); - movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)), - src.AsRegisterPairHigh()); - } else if (src.IsX87Register()) { - if (size == 4) { - fstps(Address(CpuRegister(RSP), offs)); - } else { - fstpl(Address(CpuRegister(RSP), offs)); - } - } else { - CHECK(src.IsXmmRegister()); - if (size == 4) { - movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister()); - } else { - movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister()); - } - } -} - -void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - X86_64ManagedRegister src = msrc.AsX86_64(); - CHECK(src.IsCpuRegister()); - movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); -} - -void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { - X86_64ManagedRegister src = msrc.AsX86_64(); - CHECK(src.IsCpuRegister()); - movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); -} - -void X86_64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, - ManagedRegister) { - movl(Address(CpuRegister(RSP), dest), Immediate(imm)); // TODO(64) movq? -} - -void X86_64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister) { - gs()->movl(Address::Absolute(dest, true), Immediate(imm)); // TODO(64) movq? -} - -void X86_64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - CHECK(scratch.IsCpuRegister()); - leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs)); - gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister()); -} - -void X86_64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { - gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP)); -} - -void X86_64Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/, - FrameOffset /*in_off*/, ManagedRegister /*scratch*/) { - UNIMPLEMENTED(FATAL); // this case only currently exists for ARM -} - -void X86_64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (dest.IsCpuRegister()) { - if (size == 4) { - CHECK_EQ(4u, size); - movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src)); - } else { - CHECK_EQ(8u, size); - movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src)); - } - } else if (dest.IsRegisterPair()) { - CHECK_EQ(0u, size); - movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src)); - movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4))); - } else if (dest.IsX87Register()) { - if (size == 4) { - flds(Address(CpuRegister(RSP), src)); - } else { - fldl(Address(CpuRegister(RSP), src)); - } - } else { - CHECK(dest.IsXmmRegister()); - if (size == 4) { - movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src)); - } else { - movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src)); - } - } -} - -void X86_64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - if (dest.IsNoRegister()) { - CHECK_EQ(0u, size); - } else if (dest.IsCpuRegister()) { - CHECK_EQ(4u, size); - gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true)); - } else if (dest.IsRegisterPair()) { - CHECK_EQ(8u, size); - gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true)); - } else if (dest.IsX87Register()) { - if (size == 4) { - gs()->flds(Address::Absolute(src, true)); - } else { - gs()->fldl(Address::Absolute(src, true)); - } - } else { - CHECK(dest.IsXmmRegister()); - if (size == 4) { - gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true)); - } else { - gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true)); - } - } -} - -void X86_64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(dest.IsCpuRegister()); - movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src)); -} - -void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs)); - if (unpoison_reference) { - MaybeUnpoisonHeapReference(dest.AsCpuRegister()); - } -} - -void X86_64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, - Offset offs) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs)); -} - -void X86_64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(dest.IsCpuRegister()); - gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true)); -} - -void X86_64Assembler::SignExtend(ManagedRegister mreg, size_t size) { - X86_64ManagedRegister reg = mreg.AsX86_64(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsCpuRegister()) << reg; - if (size == 1) { - movsxb(reg.AsCpuRegister(), reg.AsCpuRegister()); - } else { - movsxw(reg.AsCpuRegister(), reg.AsCpuRegister()); - } -} - -void X86_64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) { - X86_64ManagedRegister reg = mreg.AsX86_64(); - CHECK(size == 1 || size == 2) << size; - CHECK(reg.IsCpuRegister()) << reg; - if (size == 1) { - movzxb(reg.AsCpuRegister(), reg.AsCpuRegister()); - } else { - movzxw(reg.AsCpuRegister(), reg.AsCpuRegister()); - } -} - -void X86_64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { - X86_64ManagedRegister dest = mdest.AsX86_64(); - X86_64ManagedRegister src = msrc.AsX86_64(); - if (!dest.Equals(src)) { - if (dest.IsCpuRegister() && src.IsCpuRegister()) { - movq(dest.AsCpuRegister(), src.AsCpuRegister()); - } else if (src.IsX87Register() && dest.IsXmmRegister()) { - // Pass via stack and pop X87 register - subl(CpuRegister(RSP), Immediate(16)); - if (size == 4) { - CHECK_EQ(src.AsX87Register(), ST0); - fstps(Address(CpuRegister(RSP), 0)); - movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0)); - } else { - CHECK_EQ(src.AsX87Register(), ST0); - fstpl(Address(CpuRegister(RSP), 0)); - movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0)); - } - addq(CpuRegister(RSP), Immediate(16)); - } else { - // TODO: x87, SSE - UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src; - } - } -} - -void X86_64Assembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - CHECK(scratch.IsCpuRegister()); - movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src)); - movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister()); -} - -void X86_64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> thr_offs, - ManagedRegister mscratch) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - CHECK(scratch.IsCpuRegister()); - gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true)); - Store(fr_offs, scratch, 8); -} - -void X86_64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, - FrameOffset fr_offs, - ManagedRegister mscratch) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - CHECK(scratch.IsCpuRegister()); - Load(scratch, fr_offs, 8); - gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister()); -} - -void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, - size_t size) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - if (scratch.IsCpuRegister() && size == 8) { - Load(scratch, src, 4); - Store(dest, scratch, 4); - Load(scratch, FrameOffset(src.Int32Value() + 4), 4); - Store(FrameOffset(dest.Int32Value() + 4), scratch, 4); - } else { - Load(scratch, src, size); - Store(dest, scratch, size); - } -} - -void X86_64Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*scratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL); -} - -void X86_64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, - ManagedRegister scratch, size_t size) { - CHECK(scratch.IsNoRegister()); - CHECK_EQ(size, 4u); - pushq(Address(CpuRegister(RSP), src)); - popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset)); -} - -void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, - ManagedRegister mscratch, size_t size) { - CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); - CHECK_EQ(size, 4u); - movq(scratch, Address(CpuRegister(RSP), src_base)); - movq(scratch, Address(scratch, src_offset)); - movq(Address(CpuRegister(RSP), dest), scratch); -} - -void X86_64Assembler::Copy(ManagedRegister dest, Offset dest_offset, - ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) { - CHECK_EQ(size, 4u); - CHECK(scratch.IsNoRegister()); - pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset)); - popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset)); -} - -void X86_64Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister mscratch, size_t size) { - CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); - CHECK_EQ(size, 4u); - CHECK_EQ(dest.Int32Value(), src.Int32Value()); - movq(scratch, Address(CpuRegister(RSP), src)); - pushq(Address(scratch, src_offset)); - popq(Address(scratch, dest_offset)); -} - -void X86_64Assembler::MemoryBarrier(ManagedRegister) { - mfence(); -} - -void X86_64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, - FrameOffset handle_scope_offset, - ManagedRegister min_reg, bool null_allowed) { - X86_64ManagedRegister out_reg = mout_reg.AsX86_64(); - X86_64ManagedRegister in_reg = min_reg.AsX86_64(); - if (in_reg.IsNoRegister()) { // TODO(64): && null_allowed - // Use out_reg as indicator of null. - in_reg = out_reg; - // TODO: movzwl - movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); - } - CHECK(in_reg.IsCpuRegister()); - CHECK(out_reg.IsCpuRegister()); - VerifyObject(in_reg, null_allowed); - if (null_allowed) { - Label null_arg; - if (!out_reg.Equals(in_reg)) { - xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); - } - testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); - j(kZero, &null_arg); - leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); - Bind(&null_arg); - } else { - leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); - } -} - -void X86_64Assembler::CreateHandleScopeEntry(FrameOffset out_off, - FrameOffset handle_scope_offset, - ManagedRegister mscratch, - bool null_allowed) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - CHECK(scratch.IsCpuRegister()); - if (null_allowed) { - Label null_arg; - movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); - testl(scratch.AsCpuRegister(), scratch.AsCpuRegister()); - j(kZero, &null_arg); - leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); - Bind(&null_arg); - } else { - leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); - } - Store(out_off, scratch, 8); -} - -// Given a handle scope entry, load the associated reference. -void X86_64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, - ManagedRegister min_reg) { - X86_64ManagedRegister out_reg = mout_reg.AsX86_64(); - X86_64ManagedRegister in_reg = min_reg.AsX86_64(); - CHECK(out_reg.IsCpuRegister()); - CHECK(in_reg.IsCpuRegister()); - Label null_arg; - if (!out_reg.Equals(in_reg)) { - xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); - } - testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); - j(kZero, &null_arg); - movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0)); - Bind(&null_arg); -} - -void X86_64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { - // TODO: not validating references -} - -void X86_64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { - // TODO: not validating references -} - -void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) { - X86_64ManagedRegister base = mbase.AsX86_64(); - CHECK(base.IsCpuRegister()); - call(Address(base.AsCpuRegister(), offset.Int32Value())); - // TODO: place reference map on call -} - -void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { - CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); - movq(scratch, Address(CpuRegister(RSP), base)); - call(Address(scratch, offset)); -} - -void X86_64Assembler::CallFromThread64(ThreadOffset<8> offset, ManagedRegister /*mscratch*/) { - gs()->call(Address::Absolute(offset, true)); -} - -void X86_64Assembler::GetCurrentThread(ManagedRegister tr) { - gs()->movq(tr.AsX86_64().AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true)); -} - -void X86_64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) { - X86_64ManagedRegister scratch = mscratch.AsX86_64(); - gs()->movq(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<8>(), true)); - movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister()); -} - -// Slowpath entered when Thread::Current()->_exception is non-null -class X86_64ExceptionSlowPath FINAL : public SlowPath { - public: - explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const size_t stack_adjust_; -}; - -void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) { - X86_64ExceptionSlowPath* slow = new (GetArena()) X86_64ExceptionSlowPath(stack_adjust); - buffer_.EnqueueSlowPath(slow); - gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<8>(), true), Immediate(0)); - j(kNotEqual, slow->Entry()); -} - -void X86_64ExceptionSlowPath::Emit(Assembler *sasm) { - X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - // Note: the return value is dead - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); - } - // Pass exception as argument in RDI - __ gs()->movq(CpuRegister(RDI), Address::Absolute(Thread::ExceptionOffset<8>(), true)); - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(8, pDeliverException), true)); - // this call should never return - __ int3(); -#undef __ -} - void X86_64Assembler::AddConstantArea() { ArrayRef<const int32_t> area = constant_area_.GetBuffer(); for (size_t i = 0, e = area.size(); i < e; i++) { diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 92c7d0ab99..0ddc46ca44 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -20,14 +20,15 @@ #include <vector> #include "base/arena_containers.h" +#include "base/array_ref.h" #include "base/bit_utils.h" #include "base/macros.h" #include "constants_x86_64.h" #include "globals.h" #include "managed_register_x86_64.h" #include "offsets.h" -#include "utils/array_ref.h" #include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" namespace art { namespace x86_64 { @@ -258,7 +259,7 @@ class Address : public Operand { } // If no_rip is true then the Absolute address isn't RIP relative. - static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) { + static Address Absolute(ThreadOffset64 addr, bool no_rip = false) { return Absolute(addr.Int32Value(), no_rip); } @@ -389,7 +390,11 @@ class X86_64Assembler FINAL : public Assembler { void leaq(CpuRegister dst, const Address& src); void leal(CpuRegister dst, const Address& src); - void movaps(XmmRegister dst, XmmRegister src); + void movaps(XmmRegister dst, XmmRegister src); // move + void movaps(XmmRegister dst, const Address& src); // load aligned + void movups(XmmRegister dst, const Address& src); // load unaligned + void movaps(const Address& dst, XmmRegister src); // store aligned + void movups(const Address& dst, XmmRegister src); // store unaligned void movss(XmmRegister dst, const Address& src); void movss(const Address& dst, XmmRegister src); @@ -412,6 +417,17 @@ class X86_64Assembler FINAL : public Assembler { void divss(XmmRegister dst, XmmRegister src); void divss(XmmRegister dst, const Address& src); + void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void subps(XmmRegister dst, XmmRegister src); + void mulps(XmmRegister dst, XmmRegister src); + void divps(XmmRegister dst, XmmRegister src); + + void movapd(XmmRegister dst, XmmRegister src); // move + void movapd(XmmRegister dst, const Address& src); // load aligned + void movupd(XmmRegister dst, const Address& src); // load unaligned + void movapd(const Address& dst, XmmRegister src); // store aligned + void movupd(const Address& dst, XmmRegister src); // store unaligned + void movsd(XmmRegister dst, const Address& src); void movsd(const Address& dst, XmmRegister src); void movsd(XmmRegister dst, XmmRegister src); @@ -425,6 +441,31 @@ class X86_64Assembler FINAL : public Assembler { void divsd(XmmRegister dst, XmmRegister src); void divsd(XmmRegister dst, const Address& src); + void addpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void subpd(XmmRegister dst, XmmRegister src); + void mulpd(XmmRegister dst, XmmRegister src); + void divpd(XmmRegister dst, XmmRegister src); + + void movdqa(XmmRegister dst, XmmRegister src); // move + void movdqa(XmmRegister dst, const Address& src); // load aligned + void movdqu(XmmRegister dst, const Address& src); // load unaligned + void movdqa(const Address& dst, XmmRegister src); // store aligned + void movdqu(const Address& dst, XmmRegister src); // store unaligned + + void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void psubb(XmmRegister dst, XmmRegister src); + + void paddw(XmmRegister dst, XmmRegister src); + void psubw(XmmRegister dst, XmmRegister src); + void pmullw(XmmRegister dst, XmmRegister src); + + void paddd(XmmRegister dst, XmmRegister src); + void psubd(XmmRegister dst, XmmRegister src); + void pmulld(XmmRegister dst, XmmRegister src); + + void paddq(XmmRegister dst, XmmRegister src); + void psubq(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version. void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit); void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit); @@ -445,6 +486,7 @@ class X86_64Assembler FINAL : public Assembler { void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version. void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit); + void cvtdq2ps(XmmRegister dst, XmmRegister src); void cvtdq2pd(XmmRegister dst, XmmRegister src); void comiss(XmmRegister a, XmmRegister b); @@ -466,13 +508,54 @@ class X86_64Assembler FINAL : public Assembler { void xorpd(XmmRegister dst, XmmRegister src); void xorps(XmmRegister dst, const Address& src); void xorps(XmmRegister dst, XmmRegister src); + void pxor(XmmRegister dst, XmmRegister src); // no addr variant (for now) void andpd(XmmRegister dst, const Address& src); void andpd(XmmRegister dst, XmmRegister src); - void andps(XmmRegister dst, XmmRegister src); + void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pand(XmmRegister dst, XmmRegister src); - void orpd(XmmRegister dst, XmmRegister src); + void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andnps(XmmRegister dst, XmmRegister src); + void pandn(XmmRegister dst, XmmRegister src); + + void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void orps(XmmRegister dst, XmmRegister src); + void por(XmmRegister dst, XmmRegister src); + + void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pavgw(XmmRegister dst, XmmRegister src); + + void pcmpeqb(XmmRegister dst, XmmRegister src); + void pcmpeqw(XmmRegister dst, XmmRegister src); + void pcmpeqd(XmmRegister dst, XmmRegister src); + void pcmpeqq(XmmRegister dst, XmmRegister src); + + void pcmpgtb(XmmRegister dst, XmmRegister src); + void pcmpgtw(XmmRegister dst, XmmRegister src); + void pcmpgtd(XmmRegister dst, XmmRegister src); + void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2 + + void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); + void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); + + void punpcklbw(XmmRegister dst, XmmRegister src); + void punpcklwd(XmmRegister dst, XmmRegister src); + void punpckldq(XmmRegister dst, XmmRegister src); + void punpcklqdq(XmmRegister dst, XmmRegister src); + + void psllw(XmmRegister reg, const Immediate& shift_count); + void pslld(XmmRegister reg, const Immediate& shift_count); + void psllq(XmmRegister reg, const Immediate& shift_count); + + void psraw(XmmRegister reg, const Immediate& shift_count); + void psrad(XmmRegister reg, const Immediate& shift_count); + // no psraq + + void psrlw(XmmRegister reg, const Immediate& shift_count); + void psrld(XmmRegister reg, const Immediate& shift_count); + void psrlq(XmmRegister reg, const Immediate& shift_count); void flds(const Address& src); void fstps(const Address& dst); @@ -506,6 +589,7 @@ class X86_64Assembler FINAL : public Assembler { void xchgq(CpuRegister dst, CpuRegister src); void xchgl(CpuRegister reg, const Address& address); + void cmpb(const Address& address, const Immediate& imm); void cmpw(const Address& address, const Immediate& imm); void cmpl(CpuRegister reg, const Immediate& imm); @@ -526,6 +610,9 @@ class X86_64Assembler FINAL : public Assembler { void testq(CpuRegister reg1, CpuRegister reg2); void testq(CpuRegister reg, const Address& address); + void testb(const Address& address, const Immediate& imm); + void testl(const Address& address, const Immediate& imm); + void andl(CpuRegister dst, const Immediate& imm); void andl(CpuRegister dst, CpuRegister src); void andl(CpuRegister reg, const Address& address); @@ -665,6 +752,7 @@ class X86_64Assembler FINAL : public Assembler { void rolq(CpuRegister reg, const Immediate& imm); void rolq(CpuRegister operand, CpuRegister shifter); + void repne_scasb(); void repne_scasw(); void repe_cmpsw(); void repe_cmpsl(); @@ -698,124 +786,6 @@ class X86_64Assembler FINAL : public Assembler { } void Bind(NearLabel* label); - // - // Overridden common assembler high-level functionality - // - - // Emit code that will create an activation on the stack - void BuildFrame(size_t frame_size, ManagedRegister method_reg, - const std::vector<ManagedRegister>& callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - - // Emit code that will remove an activation from the stack - void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) - OVERRIDE; - - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; - - // Store routines - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; - - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; - - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch) - OVERRIDE; - - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; - - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; - - void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; - - // Load routines - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; - - void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE; - - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; - - void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; - - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; - - void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE; - - // Copying routines - void Move(ManagedRegister dest, ManagedRegister src, size_t size); - - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, - ManagedRegister scratch) OVERRIDE; - - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; - - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - - void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; - - void MemoryBarrier(ManagedRegister) OVERRIDE; - - // Sign extension - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Zero extension - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - - // Exploit fast access in managed code to Thread::Current() - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; - - // Set up out_reg to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. in_reg holds a possibly stale reference - // that can be used to avoid loading the handle scope entry to see if the value is - // null. - void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; - - // Set up out_off to hold a Object** into the handle scope, or to be null if the - // value is null and null_allowed. - void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister scratch, bool null_allowed) OVERRIDE; - - // src holds a handle scope entry (Object**) load this into dst - virtual void LoadReferenceFromHandleScope(ManagedRegister dst, - ManagedRegister src); - - // Heap::VerifyObject on src. In some cases (such as a reference to this) we - // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - - // Call to address held at [base+offset] - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE; - - // Generate code to check if Thread::Current()->exception_ is non-null - // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - // Add a double to the constant area, returning the offset into // the constant area where the literal resides. size_t AddDouble(double v) { return constant_area_.AddDouble(v); } @@ -857,6 +827,12 @@ class X86_64Assembler FINAL : public Assembler { void PoisonHeapReference(CpuRegister reg) { negl(reg); } // Unpoison a heap reference contained in `reg`. void UnpoisonHeapReference(CpuRegister reg) { negl(reg); } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(CpuRegister reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. void MaybeUnpoisonHeapReference(CpuRegister reg) { if (kPoisonHeapReferences) { diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index b19e616dd6..e7d8401e29 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -22,7 +22,9 @@ #include "base/bit_utils.h" #include "base/stl_util.h" +#include "jni_macro_assembler_x86_64.h" #include "utils/assembler_test.h" +#include "utils/jni_macro_assembler_test.h" namespace art { @@ -37,7 +39,7 @@ TEST(AssemblerX86_64, CreateBuffer) { ASSERT_EQ(static_cast<size_t>(5), buffer.Size()); } -#ifdef __ANDROID__ +#ifdef ART_TARGET_ANDROID static constexpr size_t kRandomIterations = 1000; // Devices might be puny, don't stress them... #else static constexpr size_t kRandomIterations = 100000; // Hosts are pretty powerful. @@ -954,6 +956,12 @@ TEST_F(AssemblerX86_64Test, Xorq) { DriverStr(expected, "xorq"); } +TEST_F(AssemblerX86_64Test, RepneScasb) { + GetAssembler()->repne_scasb(); + const char* expected = "repne scasb\n"; + DriverStr(expected, "repne_scasb"); +} + TEST_F(AssemblerX86_64Test, RepneScasw) { GetAssembler()->repne_scasw(); const char* expected = "repne scasw\n"; @@ -978,14 +986,76 @@ TEST_F(AssemblerX86_64Test, Movaps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::movaps, "movaps %{reg2}, %{reg1}"), "movaps"); } +TEST_F(AssemblerX86_64Test, MovapsAddr) { + GetAssembler()->movaps(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4)); + GetAssembler()->movaps(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1)); + const char* expected = + "movaps 0x4(%RSP), %xmm0\n" + "movaps %xmm1, 0x2(%RSP)\n"; + DriverStr(expected, "movaps_address"); +} + +TEST_F(AssemblerX86_64Test, MovupsAddr) { + GetAssembler()->movups(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4)); + GetAssembler()->movups(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1)); + const char* expected = + "movups 0x4(%RSP), %xmm0\n" + "movups %xmm1, 0x2(%RSP)\n"; + DriverStr(expected, "movups_address"); +} + TEST_F(AssemblerX86_64Test, Movss) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::movss, "movss %{reg2}, %{reg1}"), "movss"); } +TEST_F(AssemblerX86_64Test, Movapd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::movapd, "movapd %{reg2}, %{reg1}"), "movapd"); +} + +TEST_F(AssemblerX86_64Test, MovapdAddr) { + GetAssembler()->movapd(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4)); + GetAssembler()->movapd(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1)); + const char* expected = + "movapd 0x4(%RSP), %xmm0\n" + "movapd %xmm1, 0x2(%RSP)\n"; + DriverStr(expected, "movapd_address"); +} + +TEST_F(AssemblerX86_64Test, MovupdAddr) { + GetAssembler()->movupd(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4)); + GetAssembler()->movupd(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1)); + const char* expected = + "movupd 0x4(%RSP), %xmm0\n" + "movupd %xmm1, 0x2(%RSP)\n"; + DriverStr(expected, "movupd_address"); +} + TEST_F(AssemblerX86_64Test, Movsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::movsd, "movsd %{reg2}, %{reg1}"), "movsd"); } +TEST_F(AssemblerX86_64Test, Movdqa) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::movdqa, "movdqa %{reg2}, %{reg1}"), "movapd"); +} + +TEST_F(AssemblerX86_64Test, MovdqaAddr) { + GetAssembler()->movdqa(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4)); + GetAssembler()->movdqa(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1)); + const char* expected = + "movdqa 0x4(%RSP), %xmm0\n" + "movdqa %xmm1, 0x2(%RSP)\n"; + DriverStr(expected, "movdqa_address"); +} + +TEST_F(AssemblerX86_64Test, MovdquAddr) { + GetAssembler()->movdqu(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4)); + GetAssembler()->movdqu(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1)); + const char* expected = + "movdqu 0x4(%RSP), %xmm0\n" + "movdqu %xmm1, 0x2(%RSP)\n"; + DriverStr(expected, "movdqu_address"); +} + TEST_F(AssemblerX86_64Test, Movd1) { DriverStr(RepeatFR(&x86_64::X86_64Assembler::movd, "movd %{reg2}, %{reg1}"), "movd.1"); } @@ -1002,6 +1072,14 @@ TEST_F(AssemblerX86_64Test, Addsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::addsd, "addsd %{reg2}, %{reg1}"), "addsd"); } +TEST_F(AssemblerX86_64Test, Addps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::addps, "addps %{reg2}, %{reg1}"), "addps"); +} + +TEST_F(AssemblerX86_64Test, Addpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::addpd, "addpd %{reg2}, %{reg1}"), "addpd"); +} + TEST_F(AssemblerX86_64Test, Subss) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::subss, "subss %{reg2}, %{reg1}"), "subss"); } @@ -1010,6 +1088,14 @@ TEST_F(AssemblerX86_64Test, Subsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::subsd, "subsd %{reg2}, %{reg1}"), "subsd"); } +TEST_F(AssemblerX86_64Test, Subps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::subps, "subps %{reg2}, %{reg1}"), "subps"); +} + +TEST_F(AssemblerX86_64Test, Subpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::subpd, "subpd %{reg2}, %{reg1}"), "subpd"); +} + TEST_F(AssemblerX86_64Test, Mulss) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::mulss, "mulss %{reg2}, %{reg1}"), "mulss"); } @@ -1018,6 +1104,14 @@ TEST_F(AssemblerX86_64Test, Mulsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::mulsd, "mulsd %{reg2}, %{reg1}"), "mulsd"); } +TEST_F(AssemblerX86_64Test, Mulps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::mulps, "mulps %{reg2}, %{reg1}"), "mulps"); +} + +TEST_F(AssemblerX86_64Test, Mulpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::mulpd, "mulpd %{reg2}, %{reg1}"), "mulpd"); +} + TEST_F(AssemblerX86_64Test, Divss) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::divss, "divss %{reg2}, %{reg1}"), "divss"); } @@ -1026,6 +1120,54 @@ TEST_F(AssemblerX86_64Test, Divsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::divsd, "divsd %{reg2}, %{reg1}"), "divsd"); } +TEST_F(AssemblerX86_64Test, Divps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::divps, "divps %{reg2}, %{reg1}"), "divps"); +} + +TEST_F(AssemblerX86_64Test, Divpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd"); +} + +TEST_F(AssemblerX86_64Test, Paddb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb"); +} + +TEST_F(AssemblerX86_64Test, Psubb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb"); +} + +TEST_F(AssemblerX86_64Test, Paddw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw"); +} + +TEST_F(AssemblerX86_64Test, Psubw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw"); +} + +TEST_F(AssemblerX86_64Test, Pmullw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw"); +} + +TEST_F(AssemblerX86_64Test, Paddd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd"); +} + +TEST_F(AssemblerX86_64Test, Psubd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubd, "psubd %{reg2}, %{reg1}"), "psubd"); +} + +TEST_F(AssemblerX86_64Test, Pmulld) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld"); +} + +TEST_F(AssemblerX86_64Test, Paddq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq"); +} + +TEST_F(AssemblerX86_64Test, Psubq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); +} + TEST_F(AssemblerX86_64Test, Cvtsi2ss) { DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss"); } @@ -1063,6 +1205,10 @@ TEST_F(AssemblerX86_64Test, Cvtsd2ss) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss"); } +TEST_F(AssemblerX86_64Test, Cvtdq2ps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps"); +} + TEST_F(AssemblerX86_64Test, Cvtdq2pd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd"); } @@ -1107,6 +1253,10 @@ TEST_F(AssemblerX86_64Test, Xorpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd"); } +TEST_F(AssemblerX86_64Test, Pxor) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pxor, "pxor %{reg2}, %{reg1}"), "pxor"); +} + TEST_F(AssemblerX86_64Test, Andps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::andps, "andps %{reg2}, %{reg1}"), "andps"); } @@ -1115,6 +1265,22 @@ TEST_F(AssemblerX86_64Test, Andpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd"); } +TEST_F(AssemblerX86_64Test, Pand) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); +} + +TEST_F(AssemblerX86_64Test, andnpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); +} + +TEST_F(AssemblerX86_64Test, andnps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps"); +} + +TEST_F(AssemblerX86_64Test, Pandn) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn"); +} + TEST_F(AssemblerX86_64Test, Orps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps"); } @@ -1123,6 +1289,134 @@ TEST_F(AssemblerX86_64Test, Orpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd"); } +TEST_F(AssemblerX86_64Test, Por) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por"); +} + +TEST_F(AssemblerX86_64Test, Pavgb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb"); +} + +TEST_F(AssemblerX86_64Test, Pavgw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "pcmpeqw"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "pcmpeqd"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "pcmpeqq"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "pcmpgtb"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "pcmpgtw"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "pcmpgtd"); +} + +TEST_F(AssemblerX86_64Test, PCmpgtq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "pcmpgtq"); +} + +TEST_F(AssemblerX86_64Test, Shufps) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps"); +} + +TEST_F(AssemblerX86_64Test, Shufpd) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufpd, 1, "shufpd ${imm}, %{reg2}, %{reg1}"), "shufpd"); +} + +TEST_F(AssemblerX86_64Test, PShufd) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd"); +} + +TEST_F(AssemblerX86_64Test, Punpcklbw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw"); +} + +TEST_F(AssemblerX86_64Test, Punpcklwd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd"); +} + +TEST_F(AssemblerX86_64Test, Punpckldq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq"); +} + +TEST_F(AssemblerX86_64Test, Punpcklqdq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); +} + +TEST_F(AssemblerX86_64Test, Psllw) { + GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psllw $1, %xmm0\n" + "psllw $2, %xmm15\n", "psllwi"); +} + +TEST_F(AssemblerX86_64Test, Pslld) { + GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("pslld $1, %xmm0\n" + "pslld $2, %xmm15\n", "pslldi"); +} + +TEST_F(AssemblerX86_64Test, Psllq) { + GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psllq $1, %xmm0\n" + "psllq $2, %xmm15\n", "psllqi"); +} + +TEST_F(AssemblerX86_64Test, Psraw) { + GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psraw $1, %xmm0\n" + "psraw $2, %xmm15\n", "psrawi"); +} + +TEST_F(AssemblerX86_64Test, Psrad) { + GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrad $1, %xmm0\n" + "psrad $2, %xmm15\n", "psradi"); +} + +TEST_F(AssemblerX86_64Test, Psrlw) { + GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrlw $1, %xmm0\n" + "psrlw $2, %xmm15\n", "psrlwi"); +} + +TEST_F(AssemblerX86_64Test, Psrld) { + GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrld $1, %xmm0\n" + "psrld $2, %xmm15\n", "pslldi"); +} + +TEST_F(AssemblerX86_64Test, Psrlq) { + GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrlq $1, %xmm0\n" + "psrlq $2, %xmm15\n", "pslrqi"); +} + TEST_F(AssemblerX86_64Test, UcomissAddress) { GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); @@ -1485,6 +1779,104 @@ TEST_F(AssemblerX86_64Test, SetCC) { DriverFn(&setcc_test_fn, "setcc"); } +TEST_F(AssemblerX86_64Test, MovzxbRegs) { + DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb"); +} + +TEST_F(AssemblerX86_64Test, MovsxbRegs) { + DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb"); +} + +TEST_F(AssemblerX86_64Test, Repnescasw) { + GetAssembler()->repne_scasw(); + const char* expected = "repne scasw\n"; + DriverStr(expected, "Repnescasw"); +} + +TEST_F(AssemblerX86_64Test, Repecmpsw) { + GetAssembler()->repe_cmpsw(); + const char* expected = "repe cmpsw\n"; + DriverStr(expected, "Repecmpsw"); +} + +TEST_F(AssemblerX86_64Test, Repecmpsl) { + GetAssembler()->repe_cmpsl(); + const char* expected = "repe cmpsl\n"; + DriverStr(expected, "Repecmpsl"); +} + +TEST_F(AssemblerX86_64Test, Repecmpsq) { + GetAssembler()->repe_cmpsq(); + const char* expected = "repe cmpsq\n"; + DriverStr(expected, "Repecmpsq"); +} + +TEST_F(AssemblerX86_64Test, Cmpb) { + GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128), + x86_64::Immediate(0)); + const char* expected = "cmpb $0, 128(%RDI)\n"; + DriverStr(expected, "cmpb"); +} + +TEST_F(AssemblerX86_64Test, TestbAddressImmediate) { + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::RBX), + x86_64::TIMES_4, + 12), + x86_64::Immediate(1)); + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), + x86_64::Immediate(-128)); + GetAssembler()->testb( + x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), + x86_64::Immediate(127)); + const char* expected = + "testb $1, 0xc(%RDI,%RBX,4)\n" + "testb $-128, 0x7(%RSP)\n" + "testb $127, 0x82(%RBX)\n"; + + DriverStr(expected, "TestbAddressImmediate"); +} + +TEST_F(AssemblerX86_64Test, TestlAddressImmediate) { + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RDI), + x86_64::CpuRegister(x86_64::RBX), + x86_64::TIMES_4, + 12), + x86_64::Immediate(1)); + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)), + x86_64::Immediate(-100000)); + GetAssembler()->testl( + x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)), + x86_64::Immediate(77777777)); + const char* expected = + "testl $1, 0xc(%RDI,%RBX,4)\n" + "testl $-100000, 0x7(%RSP)\n" + "testl $77777777, 0x82(%RBX)\n"; + + DriverStr(expected, "TestlAddressImmediate"); +} + +class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> { + public: + using Base = JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler>; + + protected: + // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... + std::string GetArchitectureString() OVERRIDE { + return "x86_64"; + } + + std::string GetDisassembleParameters() OVERRIDE { + return " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn"; + } + + private: +}; + static x86_64::X86_64ManagedRegister ManagedFromCpu(x86_64::Register r) { return x86_64::X86_64ManagedRegister::FromCpuRegister(r); } @@ -1493,14 +1885,16 @@ static x86_64::X86_64ManagedRegister ManagedFromFpu(x86_64::FloatRegister r) { return x86_64::X86_64ManagedRegister::FromXmmRegister(r); } -std::string buildframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, - x86_64::X86_64Assembler* assembler) { +std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + x86_64::X86_64JNIMacroAssembler* assembler) { // TODO: more interesting spill registers / entry spills. // Two random spill regs. - std::vector<ManagedRegister> spill_regs; - spill_regs.push_back(ManagedFromCpu(x86_64::R10)); - spill_regs.push_back(ManagedFromCpu(x86_64::RSI)); + const ManagedRegister raw_spill_regs[] = { + ManagedFromCpu(x86_64::R10), + ManagedFromCpu(x86_64::RSI) + }; + ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs); // Three random entry spills. ManagedRegisterEntrySpills entry_spills; @@ -1534,18 +1928,20 @@ std::string buildframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBU return str.str(); } -TEST_F(AssemblerX86_64Test, BuildFrame) { +TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) { DriverFn(&buildframe_test_fn, "BuildFrame"); } -std::string removeframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, - x86_64::X86_64Assembler* assembler) { +std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + x86_64::X86_64JNIMacroAssembler* assembler) { // TODO: more interesting spill registers / entry spills. // Two random spill regs. - std::vector<ManagedRegister> spill_regs; - spill_regs.push_back(ManagedFromCpu(x86_64::R10)); - spill_regs.push_back(ManagedFromCpu(x86_64::RSI)); + const ManagedRegister raw_spill_regs[] = { + ManagedFromCpu(x86_64::R10), + ManagedFromCpu(x86_64::RSI) + }; + ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs); size_t frame_size = 10 * kStackAlignment; assembler->RemoveFrame(10 * kStackAlignment, spill_regs); @@ -1563,12 +1959,13 @@ std::string removeframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIB return str.str(); } -TEST_F(AssemblerX86_64Test, RemoveFrame) { +TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) { DriverFn(&removeframe_test_fn, "RemoveFrame"); } -std::string increaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, - x86_64::X86_64Assembler* assembler) { +std::string increaseframe_test_fn( + JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + x86_64::X86_64JNIMacroAssembler* assembler) { assembler->IncreaseFrameSize(0U); assembler->IncreaseFrameSize(kStackAlignment); assembler->IncreaseFrameSize(10 * kStackAlignment); @@ -1582,12 +1979,13 @@ std::string increaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTR return str.str(); } -TEST_F(AssemblerX86_64Test, IncreaseFrame) { +TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) { DriverFn(&increaseframe_test_fn, "IncreaseFrame"); } -std::string decreaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, - x86_64::X86_64Assembler* assembler) { +std::string decreaseframe_test_fn( + JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, + x86_64::X86_64JNIMacroAssembler* assembler) { assembler->DecreaseFrameSize(0U); assembler->DecreaseFrameSize(kStackAlignment); assembler->DecreaseFrameSize(10 * kStackAlignment); @@ -1601,40 +1999,8 @@ std::string decreaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTR return str.str(); } -TEST_F(AssemblerX86_64Test, DecreaseFrame) { +TEST_F(JNIMacroAssemblerX86_64Test, DecreaseFrame) { DriverFn(&decreaseframe_test_fn, "DecreaseFrame"); } -TEST_F(AssemblerX86_64Test, MovzxbRegs) { - DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb"); -} - -TEST_F(AssemblerX86_64Test, MovsxbRegs) { - DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb"); -} - -TEST_F(AssemblerX86_64Test, Repnescasw) { - GetAssembler()->repne_scasw(); - const char* expected = "repne scasw\n"; - DriverStr(expected, "Repnescasw"); -} - -TEST_F(AssemblerX86_64Test, Repecmpsw) { - GetAssembler()->repe_cmpsw(); - const char* expected = "repe cmpsw\n"; - DriverStr(expected, "Repecmpsw"); -} - -TEST_F(AssemblerX86_64Test, Repecmpsl) { - GetAssembler()->repe_cmpsl(); - const char* expected = "repe cmpsl\n"; - DriverStr(expected, "Repecmpsl"); -} - -TEST_F(AssemblerX86_64Test, Repecmpsq) { - GetAssembler()->repe_cmpsq(); - const char* expected = "repe cmpsq\n"; - DriverStr(expected, "Repecmpsq"); -} - } // namespace art diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h index 0c782d46cd..cc508a196b 100644 --- a/compiler/utils/x86_64/constants_x86_64.h +++ b/compiler/utils/x86_64/constants_x86_64.h @@ -29,15 +29,15 @@ namespace x86_64 { class CpuRegister { public: - explicit CpuRegister(Register r) : reg_(r) {} - explicit CpuRegister(int r) : reg_(Register(r)) {} - Register AsRegister() const { + explicit constexpr CpuRegister(Register r) : reg_(r) {} + explicit constexpr CpuRegister(int r) : reg_(Register(r)) {} + constexpr Register AsRegister() const { return reg_; } - uint8_t LowBits() const { + constexpr uint8_t LowBits() const { return reg_ & 7; } - bool NeedsRex() const { + constexpr bool NeedsRex() const { return reg_ > 7; } private: @@ -47,15 +47,15 @@ std::ostream& operator<<(std::ostream& os, const CpuRegister& reg); class XmmRegister { public: - explicit XmmRegister(FloatRegister r) : reg_(r) {} - explicit XmmRegister(int r) : reg_(FloatRegister(r)) {} - FloatRegister AsFloatRegister() const { + explicit constexpr XmmRegister(FloatRegister r) : reg_(r) {} + explicit constexpr XmmRegister(int r) : reg_(FloatRegister(r)) {} + constexpr FloatRegister AsFloatRegister() const { return reg_; } - uint8_t LowBits() const { + constexpr uint8_t LowBits() const { return reg_ & 7; } - bool NeedsRex() const { + constexpr bool NeedsRex() const { return reg_ > 7; } private: @@ -106,6 +106,8 @@ enum Condition { kNotZero = kNotEqual, kNegative = kSign, kPositive = kNotSign, + kCarrySet = kBelow, + kCarryClear = kAboveEqual, kUnordered = kParityEven }; diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc new file mode 100644 index 0000000000..ec86254cfc --- /dev/null +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -0,0 +1,651 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni_macro_assembler_x86_64.h" + +#include "base/casts.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "memory_region.h" +#include "thread.h" + +namespace art { +namespace x86_64 { + +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86_64Core(static_cast<int>(reg)); +} +static dwarf::Reg DWARFReg(FloatRegister reg) { + return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); +} + +constexpr size_t kFramePointerSize = 8; + +#define __ asm_. + +void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> spill_regs, + const ManagedRegisterEntrySpills& entry_spills) { + DCHECK_EQ(CodeSize(), 0U); // Nothing emitted yet. + cfi().SetCurrentCFAOffset(8); // Return address on stack. + CHECK_ALIGNED(frame_size, kStackAlignment); + int gpr_count = 0; + for (int i = spill_regs.size() - 1; i >= 0; --i) { + x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64(); + if (spill.IsCpuRegister()) { + __ pushq(spill.AsCpuRegister()); + gpr_count++; + cfi().AdjustCFAOffset(kFramePointerSize); + cfi().RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0); + } + } + // return address then method on stack. + int64_t rest_of_frame = static_cast<int64_t>(frame_size) + - (gpr_count * kFramePointerSize) + - kFramePointerSize /*return address*/; + __ subq(CpuRegister(RSP), Immediate(rest_of_frame)); + cfi().AdjustCFAOffset(rest_of_frame); + + // spill xmms + int64_t offset = rest_of_frame; + for (int i = spill_regs.size() - 1; i >= 0; --i) { + x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64(); + if (spill.IsXmmRegister()) { + offset -= sizeof(double); + __ movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister()); + cfi().RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset); + } + } + + static_assert(static_cast<size_t>(kX86_64PointerSize) == kFramePointerSize, + "Unexpected frame pointer size."); + + __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister()); + + for (size_t i = 0; i < entry_spills.size(); ++i) { + ManagedRegisterSpill spill = entry_spills.at(i); + if (spill.AsX86_64().IsCpuRegister()) { + if (spill.getSize() == 8) { + __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), + spill.AsX86_64().AsCpuRegister()); + } else { + CHECK_EQ(spill.getSize(), 4); + __ movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), + spill.AsX86_64().AsCpuRegister()); + } + } else { + if (spill.getSize() == 8) { + __ movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), + spill.AsX86_64().AsXmmRegister()); + } else { + CHECK_EQ(spill.getSize(), 4); + __ movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), + spill.AsX86_64().AsXmmRegister()); + } + } + } +} + +void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size, + ArrayRef<const ManagedRegister> spill_regs) { + CHECK_ALIGNED(frame_size, kStackAlignment); + cfi().RememberState(); + int gpr_count = 0; + // unspill xmms + int64_t offset = static_cast<int64_t>(frame_size) + - (spill_regs.size() * kFramePointerSize) + - 2 * kFramePointerSize; + for (size_t i = 0; i < spill_regs.size(); ++i) { + x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64(); + if (spill.IsXmmRegister()) { + offset += sizeof(double); + __ movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset)); + cfi().Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister())); + } else { + gpr_count++; + } + } + int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize; + __ addq(CpuRegister(RSP), Immediate(adjust)); + cfi().AdjustCFAOffset(-adjust); + for (size_t i = 0; i < spill_regs.size(); ++i) { + x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64(); + if (spill.IsCpuRegister()) { + __ popq(spill.AsCpuRegister()); + cfi().AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); + cfi().Restore(DWARFReg(spill.AsCpuRegister().AsRegister())); + } + } + __ ret(); + // The CFI should be restored for any code that follows the exit block. + cfi().RestoreState(); + cfi().DefCFAOffset(frame_size); +} + +void X86_64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) { + CHECK_ALIGNED(adjust, kStackAlignment); + __ addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust))); + cfi().AdjustCFAOffset(adjust); +} + +static void DecreaseFrameSizeImpl(size_t adjust, X86_64Assembler* assembler) { + CHECK_ALIGNED(adjust, kStackAlignment); + assembler->addq(CpuRegister(RSP), Immediate(adjust)); + assembler->cfi().AdjustCFAOffset(-adjust); +} + +void X86_64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) { + DecreaseFrameSizeImpl(adjust, &asm_); +} + +void X86_64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { + X86_64ManagedRegister src = msrc.AsX86_64(); + if (src.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (src.IsCpuRegister()) { + if (size == 4) { + CHECK_EQ(4u, size); + __ movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister()); + } else { + CHECK_EQ(8u, size); + __ movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister()); + } + } else if (src.IsRegisterPair()) { + CHECK_EQ(0u, size); + __ movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow()); + __ movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)), + src.AsRegisterPairHigh()); + } else if (src.IsX87Register()) { + if (size == 4) { + __ fstps(Address(CpuRegister(RSP), offs)); + } else { + __ fstpl(Address(CpuRegister(RSP), offs)); + } + } else { + CHECK(src.IsXmmRegister()); + if (size == 4) { + __ movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister()); + } else { + __ movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister()); + } + } +} + +void X86_64JNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { + X86_64ManagedRegister src = msrc.AsX86_64(); + CHECK(src.IsCpuRegister()); + __ movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); +} + +void X86_64JNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { + X86_64ManagedRegister src = msrc.AsX86_64(); + CHECK(src.IsCpuRegister()); + __ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); +} + +void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, + uint32_t imm, + ManagedRegister) { + __ movl(Address(CpuRegister(RSP), dest), Immediate(imm)); // TODO(64) movq? +} + +void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + X86_64ManagedRegister scratch = mscratch.AsX86_64(); + CHECK(scratch.IsCpuRegister()); + __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs)); + __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister()); +} + +void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) { + __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP)); +} + +void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/, + ManagedRegister /*src*/, + FrameOffset /*in_off*/, + ManagedRegister /*scratch*/) { + UNIMPLEMENTED(FATAL); // this case only currently exists for ARM +} + +void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) { + X86_64ManagedRegister dest = mdest.AsX86_64(); + if (dest.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (dest.IsCpuRegister()) { + if (size == 4) { + CHECK_EQ(4u, size); + __ movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src)); + } else { + CHECK_EQ(8u, size); + __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src)); + } + } else if (dest.IsRegisterPair()) { + CHECK_EQ(0u, size); + __ movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src)); + __ movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4))); + } else if (dest.IsX87Register()) { + if (size == 4) { + __ flds(Address(CpuRegister(RSP), src)); + } else { + __ fldl(Address(CpuRegister(RSP), src)); + } + } else { + CHECK(dest.IsXmmRegister()); + if (size == 4) { + __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src)); + } else { + __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src)); + } + } +} + +void X86_64JNIMacroAssembler::LoadFromThread(ManagedRegister mdest, + ThreadOffset64 src, size_t size) { + X86_64ManagedRegister dest = mdest.AsX86_64(); + if (dest.IsNoRegister()) { + CHECK_EQ(0u, size); + } else if (dest.IsCpuRegister()) { + if (size == 1u) { + __ gs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src, true)); + } else { + CHECK_EQ(4u, size); + __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true)); + } + } else if (dest.IsRegisterPair()) { + CHECK_EQ(8u, size); + __ gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true)); + } else if (dest.IsX87Register()) { + if (size == 4) { + __ gs()->flds(Address::Absolute(src, true)); + } else { + __ gs()->fldl(Address::Absolute(src, true)); + } + } else { + CHECK(dest.IsXmmRegister()); + if (size == 4) { + __ gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true)); + } else { + __ gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true)); + } + } +} + +void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { + X86_64ManagedRegister dest = mdest.AsX86_64(); + CHECK(dest.IsCpuRegister()); + __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src)); +} + +void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, + ManagedRegister mbase, + MemberOffset offs, + bool unpoison_reference) { + X86_64ManagedRegister base = mbase.AsX86_64(); + X86_64ManagedRegister dest = mdest.AsX86_64(); + CHECK(base.IsCpuRegister()); + CHECK(dest.IsCpuRegister()); + __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs)); + if (unpoison_reference) { + __ MaybeUnpoisonHeapReference(dest.AsCpuRegister()); + } +} + +void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, + ManagedRegister mbase, + Offset offs) { + X86_64ManagedRegister base = mbase.AsX86_64(); + X86_64ManagedRegister dest = mdest.AsX86_64(); + CHECK(base.IsCpuRegister()); + CHECK(dest.IsCpuRegister()); + __ movq(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs)); +} + +void X86_64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) { + X86_64ManagedRegister dest = mdest.AsX86_64(); + CHECK(dest.IsCpuRegister()); + __ gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true)); +} + +void X86_64JNIMacroAssembler::SignExtend(ManagedRegister mreg, size_t size) { + X86_64ManagedRegister reg = mreg.AsX86_64(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsCpuRegister()) << reg; + if (size == 1) { + __ movsxb(reg.AsCpuRegister(), reg.AsCpuRegister()); + } else { + __ movsxw(reg.AsCpuRegister(), reg.AsCpuRegister()); + } +} + +void X86_64JNIMacroAssembler::ZeroExtend(ManagedRegister mreg, size_t size) { + X86_64ManagedRegister reg = mreg.AsX86_64(); + CHECK(size == 1 || size == 2) << size; + CHECK(reg.IsCpuRegister()) << reg; + if (size == 1) { + __ movzxb(reg.AsCpuRegister(), reg.AsCpuRegister()); + } else { + __ movzxw(reg.AsCpuRegister(), reg.AsCpuRegister()); + } +} + +void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { + X86_64ManagedRegister dest = mdest.AsX86_64(); + X86_64ManagedRegister src = msrc.AsX86_64(); + if (!dest.Equals(src)) { + if (dest.IsCpuRegister() && src.IsCpuRegister()) { + __ movq(dest.AsCpuRegister(), src.AsCpuRegister()); + } else if (src.IsX87Register() && dest.IsXmmRegister()) { + // Pass via stack and pop X87 register + __ subl(CpuRegister(RSP), Immediate(16)); + if (size == 4) { + CHECK_EQ(src.AsX87Register(), ST0); + __ fstps(Address(CpuRegister(RSP), 0)); + __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0)); + } else { + CHECK_EQ(src.AsX87Register(), ST0); + __ fstpl(Address(CpuRegister(RSP), 0)); + __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), 0)); + } + __ addq(CpuRegister(RSP), Immediate(16)); + } else { + // TODO: x87, SSE + UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src; + } + } +} + +void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) { + X86_64ManagedRegister scratch = mscratch.AsX86_64(); + CHECK(scratch.IsCpuRegister()); + __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src)); + __ movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister()); +} + +void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 thr_offs, + ManagedRegister mscratch) { + X86_64ManagedRegister scratch = mscratch.AsX86_64(); + CHECK(scratch.IsCpuRegister()); + __ gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true)); + Store(fr_offs, scratch, 8); +} + +void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister mscratch) { + X86_64ManagedRegister scratch = mscratch.AsX86_64(); + CHECK(scratch.IsCpuRegister()); + Load(scratch, fr_offs, 8); + __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister()); +} + +void X86_64JNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src, + ManagedRegister mscratch, + size_t size) { + X86_64ManagedRegister scratch = mscratch.AsX86_64(); + if (scratch.IsCpuRegister() && size == 8) { + Load(scratch, src, 4); + Store(dest, scratch, 4); + Load(scratch, FrameOffset(src.Int32Value() + 4), 4); + Store(FrameOffset(dest.Int32Value() + 4), scratch, 4); + } else { + Load(scratch, src, size); + Store(dest, scratch, size); + } +} + +void X86_64JNIMacroAssembler::Copy(FrameOffset /*dst*/, + ManagedRegister /*src_base*/, + Offset /*src_offset*/, + ManagedRegister /*scratch*/, + size_t /*size*/) { + UNIMPLEMENTED(FATAL); +} + +void X86_64JNIMacroAssembler::Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister scratch, + size_t size) { + CHECK(scratch.IsNoRegister()); + CHECK_EQ(size, 4u); + __ pushq(Address(CpuRegister(RSP), src)); + __ popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset)); +} + +void X86_64JNIMacroAssembler::Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); + CHECK_EQ(size, 4u); + __ movq(scratch, Address(CpuRegister(RSP), src_base)); + __ movq(scratch, Address(scratch, src_offset)); + __ movq(Address(CpuRegister(RSP), dest), scratch); +} + +void X86_64JNIMacroAssembler::Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister scratch, + size_t size) { + CHECK_EQ(size, 4u); + CHECK(scratch.IsNoRegister()); + __ pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset)); + __ popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset)); +} + +void X86_64JNIMacroAssembler::Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister mscratch, + size_t size) { + CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); + CHECK_EQ(size, 4u); + CHECK_EQ(dest.Int32Value(), src.Int32Value()); + __ movq(scratch, Address(CpuRegister(RSP), src)); + __ pushq(Address(scratch, src_offset)); + __ popq(Address(scratch, dest_offset)); +} + +void X86_64JNIMacroAssembler::MemoryBarrier(ManagedRegister) { + __ mfence(); +} + +void X86_64JNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, + FrameOffset handle_scope_offset, + ManagedRegister min_reg, + bool null_allowed) { + X86_64ManagedRegister out_reg = mout_reg.AsX86_64(); + X86_64ManagedRegister in_reg = min_reg.AsX86_64(); + if (in_reg.IsNoRegister()) { // TODO(64): && null_allowed + // Use out_reg as indicator of null. + in_reg = out_reg; + // TODO: movzwl + __ movl(in_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); + } + CHECK(in_reg.IsCpuRegister()); + CHECK(out_reg.IsCpuRegister()); + VerifyObject(in_reg, null_allowed); + if (null_allowed) { + Label null_arg; + if (!out_reg.Equals(in_reg)) { + __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); + } + __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); + __ j(kZero, &null_arg); + __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); + __ Bind(&null_arg); + } else { + __ leaq(out_reg.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); + } +} + +void X86_64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handle_scope_offset, + ManagedRegister mscratch, + bool null_allowed) { + X86_64ManagedRegister scratch = mscratch.AsX86_64(); + CHECK(scratch.IsCpuRegister()); + if (null_allowed) { + Label null_arg; + __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); + __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister()); + __ j(kZero, &null_arg); + __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); + __ Bind(&null_arg); + } else { + __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset)); + } + Store(out_off, scratch, 8); +} + +// Given a handle scope entry, load the associated reference. +void X86_64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, + ManagedRegister min_reg) { + X86_64ManagedRegister out_reg = mout_reg.AsX86_64(); + X86_64ManagedRegister in_reg = min_reg.AsX86_64(); + CHECK(out_reg.IsCpuRegister()); + CHECK(in_reg.IsCpuRegister()); + Label null_arg; + if (!out_reg.Equals(in_reg)) { + __ xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister()); + } + __ testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister()); + __ j(kZero, &null_arg); + __ movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0)); + __ Bind(&null_arg); +} + +void X86_64JNIMacroAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { + // TODO: not validating references +} + +void X86_64JNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { + // TODO: not validating references +} + +void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) { + X86_64ManagedRegister base = mbase.AsX86_64(); + CHECK(base.IsCpuRegister()); + __ call(Address(base.AsCpuRegister(), offset.Int32Value())); + // TODO: place reference map on call +} + +void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { + CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); + __ movq(scratch, Address(CpuRegister(RSP), base)); + __ call(Address(scratch, offset)); +} + +void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset, ManagedRegister /*mscratch*/) { + __ gs()->call(Address::Absolute(offset, true)); +} + +void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) { + __ gs()->movq(tr.AsX86_64().AsCpuRegister(), + Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true)); +} + +void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) { + X86_64ManagedRegister scratch = mscratch.AsX86_64(); + __ gs()->movq(scratch.AsCpuRegister(), + Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true)); + __ movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister()); +} + +// Slowpath entered when Thread::Current()->_exception is non-null +class X86_64ExceptionSlowPath FINAL : public SlowPath { + public: + explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {} + virtual void Emit(Assembler *sp_asm) OVERRIDE; + private: + const size_t stack_adjust_; +}; + +void X86_64JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) { + X86_64ExceptionSlowPath* slow = new (__ GetArena()) X86_64ExceptionSlowPath(stack_adjust); + __ GetBuffer()->EnqueueSlowPath(slow); + __ gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true), Immediate(0)); + __ j(kNotEqual, slow->Entry()); +} + +std::unique_ptr<JNIMacroLabel> X86_64JNIMacroAssembler::CreateLabel() { + return std::unique_ptr<JNIMacroLabel>(new X86_64JNIMacroLabel()); +} + +void X86_64JNIMacroAssembler::Jump(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ jmp(X86_64JNIMacroLabel::Cast(label)->AsX86_64()); +} + +void X86_64JNIMacroAssembler::Jump(JNIMacroLabel* label, + JNIMacroUnaryCondition condition, + ManagedRegister test) { + CHECK(label != nullptr); + + art::x86_64::Condition x86_64_cond; + switch (condition) { + case JNIMacroUnaryCondition::kZero: + x86_64_cond = art::x86_64::kZero; + break; + case JNIMacroUnaryCondition::kNotZero: + x86_64_cond = art::x86_64::kNotZero; + break; + default: + LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition); + UNREACHABLE(); + } + + // TEST reg, reg + // Jcc <Offset> + __ testq(test.AsX86_64().AsCpuRegister(), test.AsX86_64().AsCpuRegister()); + __ j(x86_64_cond, X86_64JNIMacroLabel::Cast(label)->AsX86_64()); +} + +void X86_64JNIMacroAssembler::Bind(JNIMacroLabel* label) { + CHECK(label != nullptr); + __ Bind(X86_64JNIMacroLabel::Cast(label)->AsX86_64()); +} + +#undef __ + +void X86_64ExceptionSlowPath::Emit(Assembler *sasm) { + X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm); +#define __ sp_asm-> + __ Bind(&entry_); + // Note: the return value is dead + if (stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSizeImpl(stack_adjust_, sp_asm); + } + // Pass exception as argument in RDI + __ gs()->movq(CpuRegister(RDI), + Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true)); + __ gs()->call( + Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, pDeliverException), true)); + // this call should never return + __ int3(); +#undef __ +} + +} // namespace x86_64 +} // namespace art diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h new file mode 100644 index 0000000000..aa058f7454 --- /dev/null +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h @@ -0,0 +1,209 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_ +#define ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_ + +#include <vector> + +#include "assembler_x86_64.h" +#include "base/arena_containers.h" +#include "base/array_ref.h" +#include "base/enums.h" +#include "base/macros.h" +#include "offsets.h" +#include "utils/assembler.h" +#include "utils/jni_macro_assembler.h" + +namespace art { +namespace x86_64 { + +class X86_64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86_64Assembler, + PointerSize::k64> { + public: + explicit X86_64JNIMacroAssembler(ArenaAllocator* arena) + : JNIMacroAssemblerFwd<X86_64Assembler, PointerSize::k64>(arena) {} + virtual ~X86_64JNIMacroAssembler() {} + + // + // Overridden common assembler high-level functionality + // + + // Emit code that will create an activation on the stack + void BuildFrame(size_t frame_size, + ManagedRegister method_reg, + ArrayRef<const ManagedRegister> callee_save_regs, + const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + + // Emit code that will remove an activation from the stack + void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) + OVERRIDE; + + void IncreaseFrameSize(size_t adjust) OVERRIDE; + void DecreaseFrameSize(size_t adjust) OVERRIDE; + + // Store routines + void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; + void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + + void StoreStackOffsetToThread(ThreadOffset64 thr_offs, + FrameOffset fr_offs, + ManagedRegister scratch) OVERRIDE; + + void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; + + void StoreSpanning(FrameOffset dest, + ManagedRegister src, + FrameOffset in_off, + ManagedRegister scratch) OVERRIDE; + + // Load routines + void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + + void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE; + + void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + + void LoadRef(ManagedRegister dest, + ManagedRegister base, + MemberOffset offs, + bool unpoison_reference) OVERRIDE; + + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE; + + // Copying routines + void Move(ManagedRegister dest, ManagedRegister src, size_t size); + + void CopyRawPtrFromThread(FrameOffset fr_offs, + ThreadOffset64 thr_offs, + ManagedRegister scratch) OVERRIDE; + + void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) + OVERRIDE; + + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + + void Copy(FrameOffset dest, + ManagedRegister src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest_base, + Offset dest_offset, + FrameOffset src, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(FrameOffset dest, + FrameOffset src_base, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(ManagedRegister dest, + Offset dest_offset, + ManagedRegister src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void Copy(FrameOffset dest, + Offset dest_offset, + FrameOffset src, + Offset src_offset, + ManagedRegister scratch, + size_t size) OVERRIDE; + + void MemoryBarrier(ManagedRegister) OVERRIDE; + + // Sign extension + void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Zero extension + void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + + // Exploit fast access in managed code to Thread::Current() + void GetCurrentThread(ManagedRegister tr) OVERRIDE; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + + // Set up out_reg to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. in_reg holds a possibly stale reference + // that can be used to avoid loading the handle scope entry to see if the value is + // null. + void CreateHandleScopeEntry(ManagedRegister out_reg, + FrameOffset handlescope_offset, + ManagedRegister in_reg, + bool null_allowed) OVERRIDE; + + // Set up out_off to hold a Object** into the handle scope, or to be null if the + // value is null and null_allowed. + void CreateHandleScopeEntry(FrameOffset out_off, + FrameOffset handlescope_offset, + ManagedRegister scratch, + bool null_allowed) OVERRIDE; + + // src holds a handle scope entry (Object**) load this into dst + virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + + // Heap::VerifyObject on src. In some cases (such as a reference to this) we + // know that src may not be null. + void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; + void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + + // Call to address held at [base+offset] + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; + void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE; + + // Generate code to check if Thread::Current()->exception_ is non-null + // and branch to a ExceptionSlowPath if it is. + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + + // Create a new label that can be used with Jump/Bind calls. + std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + // Emit an unconditional jump to the label. + void Jump(JNIMacroLabel* label) OVERRIDE; + // Emit a conditional jump to the label by applying a unary condition test to the register. + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + // Code at this offset will serve as the target for the Jump call. + void Bind(JNIMacroLabel* label) OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler); +}; + +class X86_64JNIMacroLabel FINAL + : public JNIMacroLabelCommon<X86_64JNIMacroLabel, + art::Label, + kX86_64> { + public: + art::Label* AsX86_64() { + return AsPlatformLabel(); + } +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_UTILS_X86_64_JNI_MACRO_ASSEMBLER_X86_64_H_ diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h index c4228c1139..32af672670 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.h +++ b/compiler/utils/x86_64/managed_register_x86_64.h @@ -88,52 +88,52 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds + // There is a one-to-one mapping between ManagedRegister and register id. class X86_64ManagedRegister : public ManagedRegister { public: - CpuRegister AsCpuRegister() const { + constexpr CpuRegister AsCpuRegister() const { CHECK(IsCpuRegister()); return CpuRegister(static_cast<Register>(id_)); } - XmmRegister AsXmmRegister() const { + constexpr XmmRegister AsXmmRegister() const { CHECK(IsXmmRegister()); return XmmRegister(static_cast<FloatRegister>(id_ - kNumberOfCpuRegIds)); } - X87Register AsX87Register() const { + constexpr X87Register AsX87Register() const { CHECK(IsX87Register()); return static_cast<X87Register>(id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds)); } - CpuRegister AsRegisterPairLow() const { + constexpr CpuRegister AsRegisterPairLow() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdLow(). return FromRegId(AllocIdLow()).AsCpuRegister(); } - CpuRegister AsRegisterPairHigh() const { + constexpr CpuRegister AsRegisterPairHigh() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdHigh(). return FromRegId(AllocIdHigh()).AsCpuRegister(); } - bool IsCpuRegister() const { + constexpr bool IsCpuRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfCpuRegIds); } - bool IsXmmRegister() const { + constexpr bool IsXmmRegister() const { CHECK(IsValidManagedRegister()); const int test = id_ - kNumberOfCpuRegIds; return (0 <= test) && (test < kNumberOfXmmRegIds); } - bool IsX87Register() const { + constexpr bool IsX87Register() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds); return (0 <= test) && (test < kNumberOfX87RegIds); } - bool IsRegisterPair() const { + constexpr bool IsRegisterPair() const { CHECK(IsValidManagedRegister()); const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds); @@ -147,32 +147,32 @@ class X86_64ManagedRegister : public ManagedRegister { // then false is returned. bool Overlaps(const X86_64ManagedRegister& other) const; - static X86_64ManagedRegister FromCpuRegister(Register r) { + static constexpr X86_64ManagedRegister FromCpuRegister(Register r) { CHECK_NE(r, kNoRegister); return FromRegId(r); } - static X86_64ManagedRegister FromXmmRegister(FloatRegister r) { + static constexpr X86_64ManagedRegister FromXmmRegister(FloatRegister r) { return FromRegId(r + kNumberOfCpuRegIds); } - static X86_64ManagedRegister FromX87Register(X87Register r) { + static constexpr X86_64ManagedRegister FromX87Register(X87Register r) { CHECK_NE(r, kNoX87Register); return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds); } - static X86_64ManagedRegister FromRegisterPair(RegisterPair r) { + static constexpr X86_64ManagedRegister FromRegisterPair(RegisterPair r) { CHECK_NE(r, kNoRegisterPair); return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds)); } private: - bool IsValidManagedRegister() const { + constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); } - int RegId() const { + constexpr int RegId() const { CHECK(!IsNoRegister()); return id_; } @@ -188,9 +188,9 @@ class X86_64ManagedRegister : public ManagedRegister { friend class ManagedRegister; - explicit X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} + explicit constexpr X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {} - static X86_64ManagedRegister FromRegId(int reg_id) { + static constexpr X86_64ManagedRegister FromRegId(int reg_id) { X86_64ManagedRegister reg(reg_id); CHECK(reg.IsValidManagedRegister()); return reg; @@ -201,7 +201,7 @@ std::ostream& operator<<(std::ostream& os, const X86_64ManagedRegister& reg); } // namespace x86_64 -inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const { +constexpr inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const { x86_64::X86_64ManagedRegister reg(id_); CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister()); return reg; |