diff options
22 files changed, 1962 insertions, 854 deletions
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 3a0d520e47..016f28ef1e 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -56,7 +56,7 @@ class JNICFITest : public CFITest { jni_asm->IncreaseFrameSize(32); jni_asm->DecreaseFrameSize(32); jni_asm->RemoveFrame(frame_size, callee_save_regs); - jni_asm->EmitSlowPaths(); + jni_asm->FinalizeCode(); std::vector<uint8_t> actual_asm(jni_asm->CodeSize()); MemoryRegion code(&actual_asm[0], actual_asm.size()); jni_asm->FinalizeInstructions(code); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 4d7d86cce6..85fd6962fa 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -474,7 +474,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); // 17. Finalize code generation - __ EmitSlowPaths(); + __ FinalizeCode(); size_t cs = __ CodeSize(); std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index d0104300d3..a3e889f0f6 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -82,6 +82,7 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { arm::kLoadWord, arm::PC, arm::R0, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); assembler.bkpt(0); + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); assembler.FinalizeInstructions(code); diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index ee48789ad2..29355d6968 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -233,7 +233,7 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() { kArm64PointerSize).Int32Value()); assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); // Ensure we emit the literal pool. - assembler.EmitSlowPaths(); + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); assembler.FinalizeInstructions(code); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index e6b1f7c6aa..a82b08afe9 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -358,6 +358,7 @@ class CodeGenerator { number_of_register_pairs_(number_of_register_pairs), core_callee_save_mask_(core_callee_save_mask), fpu_callee_save_mask_(fpu_callee_save_mask), + stack_map_stream_(graph->GetArena()), is_baseline_(false), graph_(graph), compiler_options_(compiler_options), @@ -365,8 +366,7 @@ class CodeGenerator { block_order_(nullptr), current_block_index_(0), is_leaf_(true), - requires_current_method_(false), - stack_map_stream_(graph->GetArena()) {} + requires_current_method_(false) {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -436,6 +436,8 @@ class CodeGenerator { const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; + StackMapStream stack_map_stream_; + // Whether we are using baseline. bool is_baseline_; @@ -464,8 +466,6 @@ class CodeGenerator { // Whether an instruction in the graph accesses the current method. bool requires_current_method_; - StackMapStream stack_map_stream_; - friend class OptimizingCFITest; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3d3e35d0fc..d14594562e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -392,12 +392,26 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(false /* can_relocate_branches */), + assembler_(), isa_features_(isa_features) { // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } +void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches and literal loads and emit the literal pool. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + + CodeGenerator::Finalize(allocator); +} + Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { switch (type) { case Primitive::kPrimLong: { @@ -2831,7 +2845,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location left = locations->InAt(0); Location right = locations->InAt(1); - NearLabel less, greater, done; + Label less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -2927,7 +2941,7 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, Register temp1, Register temp2, HInstruction* instruction) { - NearLabel fail; + Label fail; if (offset != 0) { __ LoadImmediate(temp1, offset); __ add(IP, addr, ShifterOperand(temp1)); @@ -3607,7 +3621,7 @@ void CodeGeneratorARM::MarkGCCard(Register temp, Register object, Register value, bool can_be_null) { - NearLabel is_null; + Label is_null; if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } @@ -4036,7 +4050,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { Register cls = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - NearLabel done, zero; + Label done, zero; SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. @@ -4093,19 +4107,15 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - NearLabel done; // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done); + __ CompareAndBranchIfZero(obj, slow_path->GetExitLabel()); } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, temp, obj, class_offset); __ cmp(temp, ShifterOperand(cls)); __ b(slow_path->GetEntryLabel(), NE); __ Bind(slow_path->GetExitLabel()); - if (instruction->MustDoNullCheck()) { - __ Bind(&done); - } } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 824e48cc9f..1599a23568 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -298,6 +298,8 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + void Finalize(CodeAllocator* allocator) OVERRIDE; + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index bc3653d7ea..550ed70e0f 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -134,6 +134,11 @@ class StackMapStream : public ValueObject { return stack_maps_.GetRawStorage()[i]; } + void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { + DCHECK_LT(i, stack_maps_.Size()); + stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + } + uint32_t ComputeMaxNativePcOffset() const; // Prepares the stream to fill in a memory region. Must be called before FillIn. diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index cb51ed8fc8..facc6304e5 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -17,21 +17,21 @@ #include "trampoline_compiler.h" #include "jni_env_ext.h" -#include "utils/arm/assembler_arm.h" +#include "utils/arm/assembler_thumb2.h" #include "utils/arm64/assembler_arm64.h" #include "utils/mips/assembler_mips.h" #include "utils/mips64/assembler_mips64.h" #include "utils/x86/assembler_x86.h" #include "utils/x86_64/assembler_x86_64.h" -#define __ assembler-> +#define __ assembler. namespace art { namespace arm { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<4> offset) { - std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2))); + Thumb2Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (R0) in interpreter ABI. @@ -46,10 +46,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention } __ bkpt(0); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -58,7 +59,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace arm64 { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<8> offset) { - std::unique_ptr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64))); + Arm64Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (X0) in interpreter ABI. @@ -82,11 +83,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention break; } - assembler->EmitSlowPaths(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -95,7 +96,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace mips { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<4> offset) { - std::unique_ptr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips))); + MipsAssembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (A0) in interpreter ABI. @@ -112,10 +113,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention __ Nop(); __ Break(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -124,7 +126,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace mips64 { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<8> offset) { - std::unique_ptr<Mips64Assembler> assembler(static_cast<Mips64Assembler*>(Assembler::Create(kMips64))); + Mips64Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (A0) in interpreter ABI. @@ -141,10 +143,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention __ Nop(); __ Break(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -152,16 +155,17 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace x86 { static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) { - std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86))); + X86Assembler assembler; // All x86 trampolines call via the Thread* held in fs. __ fs()->jmp(Address::Absolute(offset)); __ int3(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -169,17 +173,17 @@ static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) { namespace x86_64 { static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) { - std::unique_ptr<x86_64::X86_64Assembler> - assembler(static_cast<x86_64::X86_64Assembler*>(Assembler::Create(kX86_64))); + x86_64::X86_64Assembler assembler; // All x86 trampolines call via the Thread* held in gs. __ gs()->jmp(x86_64::Address::Absolute(offset, true)); __ int3(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 350efca3e2..3458a448bc 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_ +#include <type_traits> #include <vector> #include "base/bit_utils.h" @@ -33,14 +34,47 @@ namespace arm { class Arm32Assembler; class Thumb2Assembler; -// This class indicates that the label and its uses -// will fall into a range that is encodable in 16bits on thumb2. -class NearLabel : public Label { +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { public: - NearLabel() {} + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + Label* GetLabel() { + return &label_; + } + + const Label* GetLabel() const { + return &label_; + } private: - DISALLOW_COPY_AND_ASSIGN(NearLabel); + Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); }; class ShifterOperand { @@ -529,9 +563,6 @@ class ArmAssembler : public Assembler { // Branch instructions. virtual void b(Label* label, Condition cond = AL) = 0; - virtual void b(NearLabel* label, Condition cond = AL) { - b(static_cast<Label*>(label), cond); - } virtual void bl(Label* label, Condition cond = AL) = 0; virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; @@ -541,9 +572,31 @@ class ArmAssembler : public Assembler { void Pad(uint32_t bytes); + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + virtual uint32_t GetAdjustedPosition(uint32_t old_position) = 0; + // Macros. // Most of these are pure virtual as they need to be implemented per instruction set. + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. In the absence of + // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>. + template <typename T> + Literal* NewLiteral(typename std::decay<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + virtual Literal* NewLiteral(size_t size, const uint8_t* data) = 0; + + // Load literal. + virtual void LoadLiteral(Register rt, Literal* literal) = 0; + virtual void LoadLiteral(Register rt, Register rt2, Literal* literal) = 0; + virtual void LoadLiteral(SRegister sd, Literal* literal) = 0; + virtual void LoadLiteral(DRegister dd, Literal* literal) = 0; + // Add signed constant value to rd. May clobber IP. virtual void AddConstant(Register rd, int32_t value, Condition cond = AL) = 0; virtual void AddConstant(Register rd, Register rn, int32_t value, @@ -667,9 +720,6 @@ class ArmAssembler : public Assembler { virtual void Bind(Label* label) = 0; virtual void CompareAndBranchIfZero(Register r, Label* label) = 0; - virtual void CompareAndBranchIfZero(Register r, NearLabel* label) { - CompareAndBranchIfZero(r, static_cast<Label*>(label)); - } virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0; // diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index cdf62bf885..6e60ddc260 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1354,6 +1354,41 @@ int Arm32Assembler::DecodeBranchOffset(int32_t inst) { } +uint32_t Arm32Assembler::GetAdjustedPosition(uint32_t old_position ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +Literal* Arm32Assembler::NewLiteral(size_t size ATTRIBUTE_UNUSED, + const uint8_t* data ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, Register rt2 ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(SRegister sd ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(DRegister dd ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + void Arm32Assembler::AddConstant(Register rd, int32_t value, Condition cond) { AddConstant(rd, rd, value, cond); } diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 3164623fd9..1c38eec12c 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -238,7 +238,16 @@ class Arm32Assembler FINAL : public ArmAssembler { // Memory barriers. void dmb(DmbOptions flavor) OVERRIDE; - // Macros. + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; + + Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; + void LoadLiteral(Register rt, Literal* literal) OVERRIDE; + void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; + void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; + void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; + // Add signed constant value to rd. May clobber IP. void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void AddConstant(Register rd, Register rn, int32_t value, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 26cb6c3739..f9e1ac672e 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -25,6 +25,309 @@ namespace art { namespace arm { +void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { + CHECK(!label->IsBound()); + + while (label->IsLinked()) { + FixupId fixup_id = label->Position(); // The id for linked Fixup. + Fixup* fixup = GetFixup(fixup_id); // Get the Fixup at this id. + fixup->Resolve(bound_pc); // Fixup can be resolved now. + // Add this fixup as a dependency of all later fixups. + for (FixupId id = fixup_id + 1u, end = fixups_.size(); id != end; ++id) { + GetFixup(id)->AddDependent(fixup_id); + } + uint32_t fixup_location = fixup->GetLocation(); + uint16_t next = buffer_.Load<uint16_t>(fixup_location); // Get next in chain. + buffer_.Store<int16_t>(fixup_location, 0); + label->position_ = next; // Move to next. + } + label->BindTo(bound_pc); +} + +void Thumb2Assembler::BindLiterals() { + // We don't add the padding here, that's done only after adjusting the Fixup sizes. + uint32_t code_size = buffer_.Size(); + for (Literal& lit : literals_) { + Label* label = lit.GetLabel(); + BindLabel(label, code_size); + code_size += lit.GetSize(); + } +} + +void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, + std::deque<FixupId>* fixups_to_recalculate) { + uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size); + if (adjustment != 0u) { + *current_code_size += adjustment; + for (FixupId dependent_id : fixup->Dependents()) { + Fixup* dependent = GetFixup(dependent_id); + dependent->IncreaseAdjustment(adjustment); + if (buffer_.Load<int16_t>(dependent->GetLocation()) == 0) { + buffer_.Store<int16_t>(dependent->GetLocation(), 1); + fixups_to_recalculate->push_back(dependent_id); + } + } + } +} + +uint32_t Thumb2Assembler::AdjustFixups() { + uint32_t current_code_size = buffer_.Size(); + std::deque<FixupId> fixups_to_recalculate; + if (kIsDebugBuild) { + // We will use the placeholders in the buffer_ to mark whether the fixup has + // been added to the fixups_to_recalculate. Make sure we start with zeros. + for (Fixup& fixup : fixups_) { + CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0); + } + } + for (Fixup& fixup : fixups_) { + AdjustFixupIfNeeded(&fixup, ¤t_code_size, &fixups_to_recalculate); + } + while (!fixups_to_recalculate.empty()) { + // Pop the fixup. + FixupId fixup_id = fixups_to_recalculate.front(); + fixups_to_recalculate.pop_front(); + Fixup* fixup = GetFixup(fixup_id); + DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0); + buffer_.Store<int16_t>(fixup->GetLocation(), 0); + // See if it needs adjustment. + AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); + } + if (kIsDebugBuild) { + // Check that no fixup is marked as being in fixups_to_recalculate anymore. + for (Fixup& fixup : fixups_) { + CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0); + } + } + + // Adjust literal pool labels for padding. + DCHECK_EQ(current_code_size & 1u, 0u); + uint32_t literals_adjustment = current_code_size + (current_code_size & 2) - buffer_.Size(); + if (literals_adjustment != 0u) { + for (Literal& literal : literals_) { + Label* label = literal.GetLabel(); + DCHECK(label->IsBound()); + int old_position = label->Position(); + label->Reinitialize(); + label->BindTo(old_position + literals_adjustment); + } + } + + return current_code_size; +} + +void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { + // Move non-fixup code to its final place and emit fixups. + // Process fixups in reverse order so that we don't repeatedly move the same data. + size_t src_end = buffer_.Size(); + size_t dest_end = adjusted_code_size; + buffer_.Resize(dest_end); + DCHECK_GE(dest_end, src_end); + for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) { + Fixup* fixup = &*i; + if (fixup->GetOriginalSize() == fixup->GetSize()) { + // The size of this Fixup didn't change. To avoid moving the data + // in small chunks, emit the code to its original position. + fixup->Emit(&buffer_, adjusted_code_size); + fixup->Finalize(dest_end - src_end); + } else { + // Move the data between the end of the fixup and src_end to its final location. + size_t old_fixup_location = fixup->GetLocation(); + size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes(); + size_t data_size = src_end - src_begin; + size_t dest_begin = dest_end - data_size; + buffer_.Move(dest_begin, src_begin, data_size); + src_end = old_fixup_location; + dest_end = dest_begin - fixup->GetSizeInBytes(); + // Finalize the Fixup and emit the data to the new location. + fixup->Finalize(dest_end - src_end); + fixup->Emit(&buffer_, adjusted_code_size); + } + } + CHECK_EQ(src_end, dest_end); +} + +void Thumb2Assembler::EmitLiterals() { + if (!literals_.empty()) { + // Load literal instructions (LDR, LDRD, VLDR) require 4-byte alignment. + // We don't support byte and half-word literals. + uint32_t code_size = buffer_.Size(); + DCHECK_EQ(code_size & 1u, 0u); + if ((code_size & 2u) != 0u) { + Emit16(0); + } + for (Literal& literal : literals_) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(static_cast<size_t>(literal.GetLabel()->Position()), buffer_.Size()); + DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + +inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { + DCHECK_EQ(offset & 1, 0); + int16_t encoding = B15 | B14; + if (cond != AL) { + DCHECK(IsInt<9>(offset)); + encoding |= B12 | (static_cast<int32_t>(cond) << 8) | ((offset >> 1) & 0xff); + } else { + DCHECK(IsInt<12>(offset)); + encoding |= B13 | ((offset >> 1) & 0x7ff); + } + return encoding; +} + +inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) { + DCHECK_EQ(offset & 1, 0); + int32_t s = (offset >> 31) & 1; // Sign bit. + int32_t encoding = B31 | B30 | B29 | B28 | B15 | + (s << 26) | // Sign bit goes to bit 26. + ((offset >> 1) & 0x7ff); // imm11 goes to bits 0-10. + if (cond != AL) { + DCHECK(IsInt<21>(offset)); + // Encode cond, move imm6 from bits 12-17 to bits 16-21 and move J1 and J2. + encoding |= (static_cast<int32_t>(cond) << 22) | ((offset & 0x3f000) << (16 - 12)) | + ((offset & (1 << 19)) >> (19 - 13)) | // Extract J1 from bit 19 to bit 13. + ((offset & (1 << 18)) >> (18 - 11)); // Extract J2 from bit 18 to bit 11. + } else { + DCHECK(IsInt<25>(offset)); + int32_t j1 = ((offset >> 23) ^ s ^ 1) & 1; // Calculate J1 from I1 extracted from bit 23. + int32_t j2 = ((offset >> 22)^ s ^ 1) & 1; // Calculate J2 from I2 extracted from bit 22. + // Move imm10 from bits 12-21 to bits 16-25 and add J1 and J2. + encoding |= B12 | ((offset & 0x3ff000) << (16 - 12)) | + (j1 << 13) | (j2 << 11); + } + return encoding; +} + +inline int16_t Thumb2Assembler::CbxzEncoding16(Register rn, int32_t offset, Condition cond) { + DCHECK(!IsHighRegister(rn)); + DCHECK_EQ(offset & 1, 0); + DCHECK(IsUint<7>(offset)); + DCHECK(cond == EQ || cond == NE); + return B15 | B13 | B12 | B8 | (cond == NE ? B11 : 0) | static_cast<int32_t>(rn) | + ((offset & 0x3e) << (3 - 1)) | // Move imm5 from bits 1-5 to bits 3-7. + ((offset & 0x40) << (9 - 6)); // Move i from bit 6 to bit 11 +} + +inline int16_t Thumb2Assembler::CmpRnImm8Encoding16(Register rn, int32_t value) { + DCHECK(!IsHighRegister(rn)); + DCHECK(IsUint<8>(value)); + return B13 | B11 | (rn << 8) | value; +} + +inline int16_t Thumb2Assembler::AddRdnRmEncoding16(Register rdn, Register rm) { + // The high bit of rn is moved across 4-bit rm. + return B14 | B10 | (static_cast<int32_t>(rm) << 3) | + (static_cast<int32_t>(rdn) & 7) | ((static_cast<int32_t>(rdn) & 8) << 4); +} + +inline int32_t Thumb2Assembler::MovwEncoding32(Register rd, int32_t value) { + DCHECK(IsUint<16>(value)); + return B31 | B30 | B29 | B28 | B25 | B22 | + (static_cast<int32_t>(rd) << 8) | + ((value & 0xf000) << (16 - 12)) | // Move imm4 from bits 12-15 to bits 16-19. + ((value & 0x0800) << (26 - 11)) | // Move i from bit 11 to bit 26. + ((value & 0x0700) << (12 - 8)) | // Move imm3 from bits 8-10 to bits 12-14. + (value & 0xff); // Keep imm8 in bits 0-7. +} + +inline int32_t Thumb2Assembler::MovtEncoding32(Register rd, int32_t value) { + DCHECK_EQ(value & 0xffff, 0); + int32_t movw_encoding = MovwEncoding32(rd, (value >> 16) & 0xffff); + return movw_encoding | B25 | B23; +} + +inline int32_t Thumb2Assembler::MovModImmEncoding32(Register rd, int32_t value) { + uint32_t mod_imm = ModifiedImmediate(value); + DCHECK_NE(mod_imm, kInvalidModifiedImmediate); + return B31 | B30 | B29 | B28 | B22 | B19 | B18 | B17 | B16 | + (static_cast<int32_t>(rd) << 8) | static_cast<int32_t>(mod_imm); +} + +inline int16_t Thumb2Assembler::LdrLitEncoding16(Register rt, int32_t offset) { + DCHECK(!IsHighRegister(rt)); + DCHECK_EQ(offset & 3, 0); + DCHECK(IsUint<10>(offset)); + return B14 | B11 | (static_cast<int32_t>(rt) << 8) | (offset >> 2); +} + +inline int32_t Thumb2Assembler::LdrLitEncoding32(Register rt, int32_t offset) { + // NOTE: We don't support negative offset, i.e. U=0 (B23). + return LdrRtRnImm12Encoding(rt, PC, offset); +} + +inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | + B24 /* P = 1 */ | B23 /* U = 1 */ | B22 | 0 /* W = 0 */ | B20 | + (static_cast<int32_t>(rn) << 16) | (static_cast<int32_t>(rt) << 12) | + (static_cast<int32_t>(rt2) << 8) | (offset >> 2); +} + +inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | B26 | B24 | + B23 /* U = 1 */ | B20 | B11 | B9 | + (static_cast<int32_t>(rn) << 16) | + ((static_cast<int32_t>(sd) & 0x01) << (22 - 0)) | // Move D from bit 0 to bit 22. + ((static_cast<int32_t>(sd) & 0x1e) << (12 - 1)) | // Move Vd from bits 1-4 to bits 12-15. + (offset >> 2); +} + +inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | B26 | B24 | + B23 /* U = 1 */ | B20 | B11 | B9 | B8 | + (rn << 16) | + ((static_cast<int32_t>(dd) & 0x10) << (22 - 4)) | // Move D from bit 4 to bit 22. + ((static_cast<int32_t>(dd) & 0x0f) << (12 - 0)) | // Move Vd from bits 0-3 to bits 12-15. + (offset >> 2); +} + +inline int16_t Thumb2Assembler::LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset) { + DCHECK(!IsHighRegister(rt)); + DCHECK(!IsHighRegister(rn)); + DCHECK_EQ(offset & 3, 0); + DCHECK(IsUint<7>(offset)); + return B14 | B13 | B11 | + (static_cast<int32_t>(rn) << 3) | static_cast<int32_t>(rt) | + (offset << (6 - 2)); // Move imm5 from bits 2-6 to bits 6-10. +} + +int32_t Thumb2Assembler::Fixup::LoadWideOrFpEncoding(Register rbase, int32_t offset) const { + switch (type_) { + case kLoadLiteralWide: + return LdrdEncoding32(rn_, rt2_, rbase, offset); + case kLoadFPLiteralSingle: + return VldrsEncoding32(sd_, rbase, offset); + case kLoadFPLiteralDouble: + return VldrdEncoding32(dd_, rbase, offset); + default: + LOG(FATAL) << "Unexpected type: " << static_cast<int>(type_); + UNREACHABLE(); + } +} + +inline int32_t Thumb2Assembler::LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset) { + DCHECK(IsUint<12>(offset)); + return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset; +} + +void Thumb2Assembler::FinalizeCode() { + ArmAssembler::FinalizeCode(); + BindLiterals(); + uint32_t adjusted_code_size = AdjustFixups(); + EmitFixups(adjusted_code_size); + EmitLiterals(); +} + bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, Register rn ATTRIBUTE_UNUSED, Opcode opcode, @@ -671,17 +974,11 @@ void Thumb2Assembler::vcmpdz(DRegister dd, Condition cond) { EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0); } - void Thumb2Assembler::b(Label* label, Condition cond) { EmitBranch(cond, label, false, false); } -void Thumb2Assembler::b(NearLabel* label, Condition cond) { - EmitBranch(cond, label, false, false, /* is_near */ true); -} - - void Thumb2Assembler::bl(Label* label, Condition cond) { CheckCondition(cond); EmitBranch(cond, label, true, false); @@ -1308,80 +1605,359 @@ void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register } } +inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { + switch (size) { + case kBranch16Bit: + return 2u; + case kBranch32Bit: + return 4u; + + case kCbxz16Bit: + return 2u; + case kCbxz32Bit: + return 4u; + case kCbxz48Bit: + return 6u; + + case kLiteral1KiB: + return 2u; + case kLiteral4KiB: + return 4u; + case kLiteral64KiB: + return 8u; + case kLiteral1MiB: + return 10u; + case kLiteralFar: + return 14u; + + case kLongOrFPLiteral1KiB: + return 4u; + case kLongOrFPLiteral256KiB: + return 10u; + case kLongOrFPLiteralFar: + return 14u; + } + LOG(FATAL) << "Unexpected size: " << static_cast<int>(size); + UNREACHABLE(); +} + +inline uint32_t Thumb2Assembler::Fixup::GetOriginalSizeInBytes() const { + return SizeInBytes(original_size_); +} + +inline uint32_t Thumb2Assembler::Fixup::GetSizeInBytes() const { + return SizeInBytes(size_); +} + +inline size_t Thumb2Assembler::Fixup::LiteralPoolPaddingSize(uint32_t current_code_size) { + // The code size must be a multiple of 2. + DCHECK_EQ(current_code_size & 1u, 0u); + // If it isn't a multiple of 4, we need to add a 2-byte padding before the literal pool. + return current_code_size & 2; +} + +inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) const { + static constexpr int32_t int32_min = std::numeric_limits<int32_t>::min(); + static constexpr int32_t int32_max = std::numeric_limits<int32_t>::max(); + DCHECK_LE(target_, static_cast<uint32_t>(int32_max)); + DCHECK_LE(location_, static_cast<uint32_t>(int32_max)); + DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max)); + int32_t diff = static_cast<int32_t>(target_) - static_cast<int32_t>(location_); + if (target_ > location_) { + DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max - diff)); + diff += static_cast<int32_t>(adjustment_); + } else { + DCHECK_LE(int32_min + static_cast<int32_t>(adjustment_), diff); + diff -= static_cast<int32_t>(adjustment_); + } + // The default PC adjustment for Thumb2 is 4 bytes. + DCHECK_GE(diff, int32_min + 4); + diff -= 4; + // Add additional adjustment for instructions preceding the PC usage, padding + // before the literal pool and rounding down the PC for literal loads. + switch (GetSize()) { + case kBranch16Bit: + case kBranch32Bit: + break; + case kCbxz16Bit: + break; + case kCbxz32Bit: + case kCbxz48Bit: + DCHECK_GE(diff, int32_min + 2); + diff -= 2; // Extra CMP Rn, #0, 16-bit. + break; -void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const { - bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink; - bool x = type_ == kUnconditionalX || type_ == kUnconditionalLinkX; - int32_t offset = target_ - location_; + case kLiteral1KiB: + case kLiteral4KiB: + case kLongOrFPLiteral1KiB: + DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2)); + diff += LiteralPoolPaddingSize(current_code_size); + // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC + // isn't a multiple of 2, we need to adjust. Since we already adjusted for the target + // being aligned, current PC alignment can be inferred from diff. + DCHECK_EQ(diff & 1, 0); + diff = diff + (diff & 2); + DCHECK_GE(diff, 0); + break; + case kLiteral1MiB: + case kLiteral64KiB: + case kLongOrFPLiteral256KiB: + DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. + diff -= 4; // One extra 32-bit MOV. + diff += LiteralPoolPaddingSize(current_code_size); + break; + case kLiteralFar: + case kLongOrFPLiteralFar: + DCHECK_GE(diff, 8); // The target must be at least 4 bytes after the ADD rX, PC. + diff -= 8; // Extra MOVW+MOVT; both 32-bit. + diff += LiteralPoolPaddingSize(current_code_size); + break; + } + return diff; +} - if (size_ == k32Bit) { - int32_t encoding = B31 | B30 | B29 | B28 | B15; - if (link) { - // BL or BLX immediate. - encoding |= B14; - if (!x) { - encoding |= B12; - } else { - // Bottom bit of offset must be 0. - CHECK_EQ((offset & 1), 0); +inline size_t Thumb2Assembler::Fixup::IncreaseSize(Size new_size) { + DCHECK_NE(target_, kUnresolved); + Size old_size = size_; + size_ = new_size; + DCHECK_GT(SizeInBytes(new_size), SizeInBytes(old_size)); + size_t adjustment = SizeInBytes(new_size) - SizeInBytes(old_size); + if (target_ > location_) { + adjustment_ += adjustment; + } + return adjustment; +} + +uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) { + uint32_t old_code_size = current_code_size; + switch (GetSize()) { + case kBranch16Bit: + if (IsInt(cond_ != AL ? 9 : 12, GetOffset(current_code_size))) { + break; } - } else { - if (x) { - LOG(FATAL) << "Invalid use of BX"; - UNREACHABLE(); - } else { - if (cond_ == AL) { - // Can use the T4 encoding allowing a 24 bit offset. - if (!x) { - encoding |= B12; - } - } else { - // Must be T3 encoding with a 20 bit offset. - encoding |= cond_ << 22; - } + current_code_size += IncreaseSize(kBranch32Bit); + FALLTHROUGH_INTENDED; + case kBranch32Bit: + // We don't support conditional branches beyond +-1MiB + // or unconditional branches beyond +-16MiB. + break; + + case kCbxz16Bit: + if (IsUint<7>(GetOffset(current_code_size))) { + break; } - } - encoding = Thumb2Assembler::EncodeBranchOffset(offset, encoding); - buffer->Store<int16_t>(location_, static_cast<int16_t>(encoding >> 16)); - buffer->Store<int16_t>(location_+2, static_cast<int16_t>(encoding & 0xffff)); - } else { - if (IsCompareAndBranch()) { - offset -= 4; - uint16_t i = (offset >> 6) & 1; - uint16_t imm5 = (offset >> 1) & 31U /* 0b11111 */; - int16_t encoding = B15 | B13 | B12 | - (type_ == kCompareAndBranchNonZero ? B11 : 0) | - static_cast<uint32_t>(rn_) | - B8 | - i << 9 | - imm5 << 3; + current_code_size += IncreaseSize(kCbxz32Bit); + FALLTHROUGH_INTENDED; + case kCbxz32Bit: + if (IsInt<9>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kCbxz48Bit); + FALLTHROUGH_INTENDED; + case kCbxz48Bit: + // We don't support conditional branches beyond +-1MiB. + break; + + case kLiteral1KiB: + DCHECK(!IsHighRegister(rn_)); + if (IsUint<10>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral4KiB); + FALLTHROUGH_INTENDED; + case kLiteral4KiB: + if (IsUint<12>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral64KiB); + FALLTHROUGH_INTENDED; + case kLiteral64KiB: + // Can't handle high register which we can encounter by fall-through from kLiteral4KiB. + if (!IsHighRegister(rn_) && IsUint<16>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral1MiB); + FALLTHROUGH_INTENDED; + case kLiteral1MiB: + if (IsUint<20>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteralFar); + FALLTHROUGH_INTENDED; + case kLiteralFar: + // This encoding can reach any target. + break; + + case kLongOrFPLiteral1KiB: + if (IsUint<10>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLongOrFPLiteral256KiB); + FALLTHROUGH_INTENDED; + case kLongOrFPLiteral256KiB: + if (IsUint<18>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLongOrFPLiteralFar); + FALLTHROUGH_INTENDED; + case kLongOrFPLiteralFar: + // This encoding can reach any target. + break; + } + return current_code_size - old_code_size; +} + +void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const { + switch (GetSize()) { + case kBranch16Bit: { + DCHECK(type_ == kUnconditional || type_ == kConditional); + DCHECK_EQ(type_ == kConditional, cond_ != AL); + int16_t encoding = BEncoding16(GetOffset(code_size), cond_); buffer->Store<int16_t>(location_, encoding); - } else { - offset -= 4; // Account for PC offset. - int16_t encoding; - // 16 bit. - if (cond_ == AL) { - encoding = B15 | B14 | B13 | - ((offset >> 1) & 0x7ff); - } else { - encoding = B15 | B14 | B12 | - cond_ << 8 | ((offset >> 1) & 0xff); + break; + } + case kBranch32Bit: { + DCHECK(type_ == kConditional || type_ == kUnconditional || + type_ == kUnconditionalLink || type_ == kUnconditionalLinkX); + DCHECK_EQ(type_ == kConditional, cond_ != AL); + int32_t encoding = BEncoding32(GetOffset(code_size), cond_); + if (type_ == kUnconditionalLink) { + DCHECK_NE(encoding & B12, 0); + encoding |= B14; + } else if (type_ == kUnconditionalLinkX) { + DCHECK_NE(encoding & B12, 0); + encoding ^= B14 | B12; } + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + + case kCbxz16Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, encoding); + break; + } + case kCbxz32Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + DCHECK(cond_ == EQ || cond_ == NE); + int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); + int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, cmp_encoding); + buffer->Store<int16_t>(location_ + 2, b_encoding); + break; + } + case kCbxz48Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + DCHECK(cond_ == EQ || cond_ == NE); + int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); + int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, cmp_encoding); + buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16); + buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + break; + } + + case kLiteral1KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size)); buffer->Store<int16_t>(location_, encoding); + break; + } + case kLiteral4KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly. + int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + case kLiteral64KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int16_t ldr_encoding = LdrRtRnImm5Encoding16(rn_, rn_, 0); + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding); + break; + } + case kLiteral1MiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t offset = GetOffset(code_size); + int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff); + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + case kLiteralFar: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t offset = GetOffset(code_size); + int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); + int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0); + buffer->Store<int16_t>(location_, movw_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + + case kLongOrFPLiteral1KiB: { + int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + case kLongOrFPLiteral256KiB: { + int32_t offset = GetOffset(code_size); + int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff); + int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff); // DCHECKs type_. + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + case kLongOrFPLiteralFar: { + int32_t offset = GetOffset(code_size); + int32_t movw_encoding = MovwEncoding32(IP, offset & 0xffff); + int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff); + int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_. + buffer->Store<int16_t>(location_, movw_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; } } } - uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) { CHECK(IsLowRegister(rn)); uint32_t location = buffer_.Size(); // This is always unresolved as it must be a forward branch. Emit16(prev); // Previous link. - return AddBranch(n ? Branch::kCompareAndBranchNonZero : Branch::kCompareAndBranchZero, - location, rn); + return AddFixup(Fixup::CompareAndBranch(location, rn, n ? NE : EQ)); } @@ -1619,47 +2195,53 @@ void Thumb2Assembler::EmitMultiMemOp(Condition cond, } } - -void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near) { +void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) { + bool use32bit = IsForced32Bit() || !CanRelocateBranches(); uint32_t pc = buffer_.Size(); - Branch::Type branch_type; + Fixup::Type branch_type; if (cond == AL) { if (link) { + use32bit = true; if (x) { - branch_type = Branch::kUnconditionalLinkX; // BLX. + branch_type = Fixup::kUnconditionalLinkX; // BLX. } else { - branch_type = Branch::kUnconditionalLink; // BX. + branch_type = Fixup::kUnconditionalLink; // BX. } } else { - branch_type = Branch::kUnconditional; // B. + branch_type = Fixup::kUnconditional; // B. } } else { - branch_type = Branch::kConditional; // B<cond>. + branch_type = Fixup::kConditional; // B<cond>. } + Fixup::Size size = use32bit ? Fixup::kBranch32Bit : Fixup::kBranch16Bit; + FixupId branch_id = AddFixup(Fixup::Branch(pc, branch_type, size, cond)); + if (label->IsBound()) { - Branch::Size size = AddBranch(branch_type, pc, label->Position(), cond); // Resolved branch. - - // The branch is to a bound label which means that it's a backwards branch. We know the - // current size of it so we can emit the appropriate space. Note that if it's a 16 bit - // branch the size may change if it so happens that other branches change size that change - // the distance to the target and that distance puts this branch over the limit for 16 bits. - if (size == Branch::k16Bit) { - Emit16(0); // Space for a 16 bit branch. - } else { - Emit32(0); // Space for a 32 bit branch. + // The branch is to a bound label which means that it's a backwards branch. + // Record this branch as a dependency of all Fixups between the label and the branch. + GetFixup(branch_id)->Resolve(label->Position()); + for (FixupId fixup_id = branch_id; fixup_id != 0u; ) { + --fixup_id; + Fixup* fixup = GetFixup(fixup_id); + DCHECK_GE(label->Position(), 0); + if (fixup->GetLocation() < static_cast<uint32_t>(label->Position())) { + break; + } + fixup->AddDependent(branch_id); } + Emit16(0); } else { - // Branch is to an unbound label. Emit space for it. - uint16_t branch_id = AddBranch(branch_type, pc, cond, is_near); // Unresolved branch. - if (force_32bit_ || (!CanRelocateBranches() && !is_near)) { - Emit16(static_cast<uint16_t>(label->position_)); // Emit current label link. - Emit16(0); // another 16 bits. - } else { - Emit16(static_cast<uint16_t>(label->position_)); // Emit current label link. - } - label->LinkTo(branch_id); // Link to the branch ID. + // Branch target is an unbound label. Add it to a singly-linked list maintained within + // the code with the label serving as the head. + Emit16(static_cast<uint16_t>(label->position_)); + label->LinkTo(branch_id); } + + if (use32bit) { + Emit16(0); + } + DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes()); } @@ -2274,82 +2856,8 @@ void Thumb2Assembler::Mov(Register rd, Register rm, Condition cond) { } -// A branch has changed size. Make a hole for it. -void Thumb2Assembler::MakeHoleForBranch(uint32_t location, uint32_t delta) { - // Move the contents of the buffer using: Move(newposition, oldposition) - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Move(location + delta, location); -} - - void Thumb2Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - uint32_t bound_pc = buffer_.Size(); - std::vector<Branch*> changed_branches; - - while (label->IsLinked()) { - uint16_t position = label->Position(); // Branch id for linked branch. - Branch* branch = GetBranch(position); // Get the branch at this id. - bool changed = branch->Resolve(bound_pc); // Branch can be resolved now. - uint32_t branch_location = branch->GetLocation(); - uint16_t next = buffer_.Load<uint16_t>(branch_location); // Get next in chain. - if (changed) { - DCHECK(CanRelocateBranches()); - MakeHoleForBranch(branch->GetLocation(), 2); - if (branch->IsCompareAndBranch()) { - // A cbz/cbnz instruction has changed size. There is no valid encoding for - // a 32 bit cbz/cbnz so we need to change this to an instruction pair: - // cmp rn, #0 - // b<eq|ne> target - bool n = branch->GetType() == Branch::kCompareAndBranchNonZero; - Condition cond = n ? NE : EQ; - branch->Move(2); // Move the branch forward by 2 bytes. - branch->ResetTypeAndCondition(Branch::kConditional, cond); - branch->ResetSize(Branch::k16Bit); - - // Now add a compare instruction in the place the branch was. - buffer_.Store<int16_t>(branch_location, - B13 | B11 | static_cast<int16_t>(branch->GetRegister()) << 8); - - // Since have moved made a hole in the code we need to reload the - // current pc. - bound_pc = buffer_.Size(); - - // Now resolve the newly added branch. - changed = branch->Resolve(bound_pc); - if (changed) { - MakeHoleForBranch(branch->GetLocation(), 2); - changed_branches.push_back(branch); - } - } else { - changed_branches.push_back(branch); - } - } - label->position_ = next; // Move to next. - } - label->BindTo(bound_pc); - - // Now relocate any changed branches. Do this until there are no more changes. - std::vector<Branch*> branches_to_process = changed_branches; - while (branches_to_process.size() != 0) { - changed_branches.clear(); - for (auto& changed_branch : branches_to_process) { - for (auto& branch : branches_) { - bool changed = branch->Relocate(changed_branch->GetLocation(), 2); - if (changed) { - changed_branches.push_back(branch); - } - } - branches_to_process = changed_branches; - } - } -} - - -void Thumb2Assembler::EmitBranches() { - for (auto& branch : branches_) { - branch->Emit(&buffer_); - } + BindLabel(label, buffer_.Size()); } @@ -2487,6 +2995,85 @@ int Thumb2Assembler::DecodeBranchOffset(int32_t instr) { return imm32; } +uint32_t Thumb2Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the fixups from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of fixups. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0u; + last_old_position_ = 0u; + last_fixup_id_ = 0u; + } + while (last_fixup_id_ != fixups_.size()) { + Fixup* fixup = GetFixup(last_fixup_id_); + if (fixup->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + if (fixup->GetSize() != fixup->GetOriginalSize()) { + last_position_adjustment_ += fixup->GetSizeInBytes() - fixup->GetOriginalSizeInBytes(); + } + ++last_fixup_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +Literal* Thumb2Assembler::NewLiteral(size_t size, const uint8_t* data) { + DCHECK(size == 4u || size == 8u) << size; + literals_.emplace_back(size, data); + return &literals_.back(); +} + +void Thumb2Assembler::LoadLiteral(Register rt, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + DCHECK(!literal->GetLabel()->IsBound()); + bool use32bit = IsForced32Bit() || IsHighRegister(rt); + uint32_t location = buffer_.Size(); + Fixup::Size size = use32bit ? Fixup::kLiteral4KiB : Fixup::kLiteral1KiB; + FixupId fixup_id = AddFixup(Fixup::LoadNarrowLiteral(location, rt, size)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + if (use32bit) { + Emit16(0); + } + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(Register rt, Register rt2, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = + AddFixup(Fixup::LoadWideLiteral(location, rt, rt2, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(SRegister sd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = AddFixup(Fixup::LoadSingleLiteral(location, sd, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(DRegister dd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = AddFixup(Fixup::LoadDoubleLiteral(location, dd, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} void Thumb2Assembler::AddConstant(Register rd, int32_t value, Condition cond) { AddConstant(rd, rd, value, cond); @@ -2763,16 +3350,6 @@ void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) { } -void Thumb2Assembler::CompareAndBranchIfZero(Register r, NearLabel* label) { - if (IsLowRegister(r)) { - cbz(r, label); - } else { - cmp(r, ShifterOperand(0)); - b(label, EQ); - } -} - - void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) { if (CanRelocateBranches() && IsLowRegister(r)) { cbnz(r, label); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 2382b74c30..5e6969b4c2 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ +#include <deque> #include <vector> #include "base/logging.h" @@ -34,13 +35,15 @@ class Thumb2Assembler FINAL : public ArmAssembler { : can_relocate_branches_(can_relocate_branches), force_32bit_(false), it_cond_index_(kNoItCondition), - next_condition_(AL) { + next_condition_(AL), + fixups_(), + literals_(), + last_position_adjustment_(0u), + last_old_position_(0u), + last_fixup_id_(0u) { } virtual ~Thumb2Assembler() { - for (auto& branch : branches_) { - delete branch; - } } bool IsThumb() const OVERRIDE { @@ -55,10 +58,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { return can_relocate_branches_; } - void FinalizeInstructions(const MemoryRegion& region) OVERRIDE { - EmitBranches(); - Assembler::FinalizeInstructions(region); - } + void FinalizeCode() OVERRIDE; // Data-processing instructions. void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; @@ -238,7 +238,6 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Branch instructions. void b(Label* label, Condition cond = AL); - void b(NearLabel* label, Condition cond = AL); void bl(Label* label, Condition cond = AL); void blx(Label* label); void blx(Register rm, Condition cond = AL) OVERRIDE; @@ -273,13 +272,23 @@ class Thumb2Assembler FINAL : public ArmAssembler { void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; - void CompareAndBranchIfZero(Register r, NearLabel* label) OVERRIDE; void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; // Memory barriers. void dmb(DmbOptions flavor) OVERRIDE; - // Macros. + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; + + using ArmAssembler::NewLiteral; // Make the helper template visible. + + Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; + void LoadLiteral(Register rt, Literal* literal) OVERRIDE; + void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; + void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; + void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; + // Add signed constant value to rd. May clobber IP. void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void AddConstant(Register rd, Register rn, int32_t value, @@ -340,6 +349,244 @@ class Thumb2Assembler FINAL : public ArmAssembler { } private: + typedef uint16_t FixupId; + + // Fixup: branches and literal pool references. + // + // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This + // depends on both the type of branch and the offset to which it is branching. The 16-bit + // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare + // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be + // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence + // of instructions to make up for the limited range of load literal instructions (up to + // 4KiB for the 32-bit variant). When generating code for these insns we don't know the + // size before hand, so we assume it is the smallest available size and determine the final + // code offsets and sizes and emit code in FinalizeCode(). + // + // To handle this, we keep a record of every branch and literal pool load in the program. + // The actual instruction encoding for these is delayed until we know the final size of + // every instruction. When we bind a label to a branch we don't know the final location yet + // as some preceding instructions may need to be expanded, so we record a non-final offset. + // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of + // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with + // target on the other side of the expanded insn, as their offsets change and this may + // trigger further expansion. + // + // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the + // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing + // to it, using the fixup ids as links. The first link is stored in the label's position + // (the label is linked but not bound), the following links are stored in the code buffer, + // in the placeholder where we will eventually emit the actual code. + + class Fixup { + public: + // Branch type. + enum Type : uint8_t { + kConditional, // B<cond>. + kUnconditional, // B. + kUnconditionalLink, // BL. + kUnconditionalLinkX, // BLX. + kCompareAndBranchXZero, // cbz/cbnz. + kLoadLiteralNarrow, // Load narrrow integer literal. + kLoadLiteralWide, // Load wide integer literal. + kLoadFPLiteralSingle, // Load FP literal single. + kLoadFPLiteralDouble, // Load FP literal double. + }; + + // Calculated size of branch instruction based on type and offset. + enum Size : uint8_t { + // Branch variants. + kBranch16Bit, + kBranch32Bit, + // NOTE: We don't support branches which would require multiple instructions, i.e. + // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB. + + // CBZ/CBNZ variants. + kCbxz16Bit, // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset. + kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. + kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. + + // Load integer literal variants. + // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. + kLiteral1KiB, + // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes. + kLiteral4KiB, + // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes. + kLiteral64KiB, + // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes. + kLiteral1MiB, + // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit. + // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes. + kLiteralFar, + + // Load long or FP literal variants. + // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. + kLongOrFPLiteral1KiB, + // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes. + kLongOrFPLiteral256KiB, + // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. + kLongOrFPLiteralFar, + }; + + // Unresolved branch possibly with a condition. + static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit, + Condition cond = AL) { + DCHECK(type == kConditional || type == kUnconditional || + type == kUnconditionalLink || type == kUnconditionalLinkX); + DCHECK(size == kBranch16Bit || size == kBranch32Bit); + DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional)); + return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister, + cond, type, size, location); + } + + // Unresolved compare-and-branch instruction with a register and condition (EQ or NE). + static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) { + DCHECK(cond == EQ || cond == NE); + return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister, + cond, kCompareAndBranchXZero, kCbxz16Bit, location); + } + + // Load narrow literal. + static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) { + DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || + size == kLiteral1MiB || size == kLiteralFar); + DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadLiteralNarrow, size, location); + } + + // Load wide literal. + static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); + return Fixup(rt, rt2, kNoSRegister, kNoDRegister, + AL, kLoadLiteralWide, size, location); + } + + // Load FP single literal. + static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, + AL, kLoadFPLiteralSingle, size, location); + } + + // Load FP double literal. + static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, + AL, kLoadFPLiteralDouble, size, location); + } + + Type GetType() const { + return type_; + } + + Size GetOriginalSize() const { + return original_size_; + } + + Size GetSize() const { + return size_; + } + + uint32_t GetOriginalSizeInBytes() const; + + uint32_t GetSizeInBytes() const; + + uint32_t GetLocation() const { + return location_; + } + + uint32_t GetAdjustment() const { + return adjustment_; + } + + const std::vector<FixupId>& Dependents() const { + return dependents_; + } + + void AddDependent(FixupId dependent_id) { + dependents_.push_back(dependent_id); + } + + // Resolve a branch when the target is known. + void Resolve(uint32_t target) { + DCHECK_EQ(target_, kUnresolved); + DCHECK_NE(target, kUnresolved); + target_ = target; + } + + // Check if the current size is OK for current location_, target_ and adjustment_. + // If not, increase the size. Return the size increase, 0 if unchanged. + // If the target if after this Fixup, also add the difference to adjustment_, + // so that we don't need to consider forward Fixups as their own dependencies. + uint32_t AdjustSizeIfNeeded(uint32_t current_code_size); + + // Increase adjustments. This is called for dependents of a Fixup when its size changes. + void IncreaseAdjustment(uint32_t increase) { + adjustment_ += increase; + } + + // Finalize the branch with an adjustment to the location. Both location and target are updated. + void Finalize(uint32_t location_adjustment) { + DCHECK_NE(target_, kUnresolved); + location_ += location_adjustment; + target_ += location_adjustment; + } + + // Emit the branch instruction into the assembler buffer. This does the + // encoding into the thumb instruction. + void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; + + private: + Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, + Condition cond, Type type, Size size, uint32_t location) + : rn_(rn), + rt2_(rt2), + sd_(sd), + dd_(dd), + cond_(cond), + type_(type), + original_size_(size), size_(size), + location_(location), + target_(kUnresolved), + adjustment_(0u), + dependents_() { + } + static size_t SizeInBytes(Size size); + + // The size of padding added before the literal pool. + static size_t LiteralPoolPaddingSize(uint32_t current_code_size); + + // Returns the offset from the PC-using insn to the target. + int32_t GetOffset(uint32_t current_code_size) const; + + size_t IncreaseSize(Size new_size); + + int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. + + const Register rn_; // Rn for cbnz/cbz, Rt for literal loads. + Register rt2_; // For kLoadLiteralWide. + SRegister sd_; // For kLoadFPLiteralSingle. + DRegister dd_; // For kLoadFPLiteralDouble. + const Condition cond_; + const Type type_; + Size original_size_; + Size size_; + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. + std::vector<FixupId> dependents_; // Fixups that require adjustment when current size changes. + }; + // Emit a single 32 or 16 bit data processing instruction. void EmitDataProcessing(Condition cond, Opcode opcode, @@ -432,7 +679,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); - void EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near = false); + void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); int32_t EncodeTstOffset(int offset, int32_t inst); @@ -475,275 +722,53 @@ class Thumb2Assembler FINAL : public ArmAssembler { CheckCondition(cond); } - // Branches. - // - // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This - // depends on both the type of branch and the offset to which it is branching. When - // generating code for branches we don't know the size before hand (if the branch is - // going forward, because we haven't seen the target address yet), so we need to assume - // that it is going to be one of 16 or 32 bits. When we know the target (the label is 'bound') - // we can determine the actual size of the branch. However, if we had guessed wrong before - // we knew the target there will be no room in the instruction sequence for the new - // instruction (assume that we never decrease the size of a branch). - // - // To handle this, we keep a record of every branch in the program. The actual instruction - // encoding for these is delayed until we know the final size of every branch. When we - // bind a label to a branch (we then know the target address) we determine if the branch - // has changed size. If it has we need to move all the instructions in the buffer after - // the branch point forward by the change in size of the branch. This will create a gap - // in the code big enough for the new branch encoding. However, since we have moved - // a chunk of code we need to relocate the branches in that code to their new address. - // - // Creating a hole in the code for the new branch encoding might cause another branch that was - // 16 bits to become 32 bits, so we need to find this in another pass. - // - // We also need to deal with a cbz/cbnz instruction that becomes too big for its offset - // range. We do this by converting it to two instructions: - // cmp Rn, #0 - // b<cond> target - // But we also need to handle the case where the conditional branch is out of range and - // becomes a 32 bit conditional branch. - // - // All branches have a 'branch id' which is a 16 bit unsigned number used to identify - // the branch. Unresolved labels use the branch id to link to the next unresolved branch. - - class Branch { - public: - // Branch type. - enum Type { - kUnconditional, // B. - kConditional, // B<cond>. - kCompareAndBranchZero, // cbz. - kCompareAndBranchNonZero, // cbnz. - kUnconditionalLink, // BL. - kUnconditionalLinkX, // BLX. - kUnconditionalX // BX. - }; - - // Calculated size of branch instruction based on type and offset. - enum Size { - k16Bit, - k32Bit - }; - - // Unresolved branch possibly with a condition. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Condition cond = AL) : - assembler_(assembler), type_(type), location_(location), - target_(kUnresolved), - cond_(cond), rn_(R0) { - CHECK(!IsCompareAndBranch()); - size_ = CalculateSize(); - } - - // Unresolved compare-and-branch instruction with a register. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Register rn) : - assembler_(assembler), type_(type), location_(location), - target_(kUnresolved), cond_(AL), rn_(rn) { - CHECK(IsCompareAndBranch()); - size_ = CalculateSize(); - } - - // Resolved branch (can't be compare-and-branch) with a target and possibly a condition. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, uint32_t target, - Condition cond = AL) : - assembler_(assembler), type_(type), location_(location), - target_(target), cond_(cond), rn_(R0) { - CHECK(!IsCompareAndBranch()); - // Resolved branch. - size_ = CalculateSize(); - } - - bool IsCompareAndBranch() const { - return type_ == kCompareAndBranchNonZero || type_ == kCompareAndBranchZero; - } - - // Resolve a branch when the target is known. If this causes the - // size of the branch to change return true. Otherwise return false. - bool Resolve(uint32_t target) { - uint32_t old_target = target_; - target_ = target; - if (assembler_->CanRelocateBranches()) { - Size new_size = CalculateSize(); - if (size_ != new_size) { - size_ = new_size; - return true; - } - return false; - } else { - if (kIsDebugBuild) { - if (old_target == kUnresolved) { - // Check that the size has not increased. - DCHECK(!(CalculateSize() == k32Bit && size_ == k16Bit)); - } else { - DCHECK(CalculateSize() == size_); - } - } - return false; - } - } - - // Move a cbz/cbnz branch. This is always forward. - void Move(int32_t delta) { - CHECK(IsCompareAndBranch()); - CHECK_GT(delta, 0); - location_ += delta; - target_ += delta; - } - - // Relocate a branch by a given delta. This changed the location and - // target if they need to be changed. It also recalculates the - // size of the branch instruction. It returns true if the branch - // has changed size. - bool Relocate(uint32_t oldlocation, int32_t delta) { - DCHECK(assembler_->CanRelocateBranches()); - if (location_ > oldlocation) { - location_ += delta; - } - if (target_ != kUnresolved) { - if (target_ > oldlocation) { - target_ += delta; - } - } else { - return false; // Don't know the size yet. - } - - // Calculate the new size. - Size new_size = CalculateSize(); - if (size_ != new_size) { - size_ = new_size; - return true; - } - return false; - } - - Size GetSize() const { - return size_; - } - - Type GetType() const { - return type_; - } - - uint32_t GetLocation() const { - return location_; - } - - // Emit the branch instruction into the assembler buffer. This does the - // encoding into the thumb instruction. - void Emit(AssemblerBuffer* buffer) const; - - // Reset the type and condition to those given. This used for - // cbz/cbnz instructions when they are converted to cmp/b<cond> - void ResetTypeAndCondition(Type type, Condition cond) { - CHECK(IsCompareAndBranch()); - CHECK(cond == EQ || cond == NE); - type_ = type; - cond_ = cond; - } - - Register GetRegister() const { - return rn_; - } - - void ResetSize(Size size) { - size_ = size; - } - - private: - // Calculate the size of the branch instruction based on its type and offset. - Size CalculateSize() const { - if (target_ == kUnresolved) { - if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) { - return k32Bit; - } - if (IsCompareAndBranch()) { - // Compare and branch instructions can only be encoded on 16 bits. - return k16Bit; - } - return assembler_->CanRelocateBranches() ? k16Bit : k32Bit; - } - // When the target is resolved, we know the best encoding for it. - int32_t delta = target_ - location_ - 4; - if (delta < 0) { - delta = -delta; - } - switch (type_) { - case kUnconditional: - if (assembler_->IsForced32Bit() || delta >= (1 << 11)) { - return k32Bit; - } else { - return k16Bit; - } - case kConditional: - if (assembler_->IsForced32Bit() || delta >= (1 << 8)) { - return k32Bit; - } else { - return k16Bit; - } - case kCompareAndBranchZero: - case kCompareAndBranchNonZero: - if (delta >= (1 << 7)) { - return k32Bit; // Will cause this branch to become invalid. - } - return k16Bit; - - case kUnconditionalX: - case kUnconditionalLinkX: - return k16Bit; - case kUnconditionalLink: - return k32Bit; - } - LOG(FATAL) << "Cannot reach"; - return k16Bit; - } - - static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. - const Thumb2Assembler* assembler_; - Type type_; - uint32_t location_; // Offset into assembler buffer in bytes. - uint32_t target_; // Offset into assembler buffer in bytes. - Size size_; - Condition cond_; - const Register rn_; - }; - - std::vector<Branch*> branches_; - - // Add a resolved branch and return its size. - Branch::Size AddBranch(Branch::Type type, uint32_t location, uint32_t target, - Condition cond = AL) { - branches_.push_back(new Branch(this, type, location, target, cond)); - return branches_[branches_.size()-1]->GetSize(); - } - - // Add a compare and branch (with a register) and return its id. - uint16_t AddBranch(Branch::Type type, uint32_t location, Register rn) { - branches_.push_back(new Branch(this, type, location, rn)); - return branches_.size() - 1; + FixupId AddFixup(Fixup fixup) { + FixupId fixup_id = static_cast<FixupId>(fixups_.size()); + fixups_.push_back(fixup); + // For iterating using FixupId, we need the next id to be representable. + DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size()); + return fixup_id; } - // Add an unresolved branch and return its id. - uint16_t AddBranch(Branch::Type type, - uint32_t location, - Condition cond = AL, - bool is_near = false) { - Branch* branch = new Branch(this, type, location, cond); - if (is_near) { - branch->ResetSize(Branch::k16Bit); - } - branches_.push_back(branch); - return branches_.size() - 1; - } - - Branch* GetBranch(uint16_t branchid) { - if (branchid >= branches_.size()) { - return nullptr; - } - return branches_[branchid]; + Fixup* GetFixup(FixupId fixup_id) { + DCHECK_LT(fixup_id, fixups_.size()); + return &fixups_[fixup_id]; } - void EmitBranches(); - void MakeHoleForBranch(uint32_t location, uint32_t size); + void BindLabel(Label* label, uint32_t bound_pc); + void BindLiterals(); + void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, + std::deque<FixupId>* fixups_to_recalculate); + uint32_t AdjustFixups(); + void EmitFixups(uint32_t adjusted_code_size); + void EmitLiterals(); + + static int16_t BEncoding16(int32_t offset, Condition cond); + static int32_t BEncoding32(int32_t offset, Condition cond); + static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond); + static int16_t CmpRnImm8Encoding16(Register rn, int32_t value); + static int16_t AddRdnRmEncoding16(Register rdn, Register rm); + static int32_t MovwEncoding32(Register rd, int32_t value); + static int32_t MovtEncoding32(Register rd, int32_t value); + static int32_t MovModImmEncoding32(Register rd, int32_t value); + static int16_t LdrLitEncoding16(Register rt, int32_t offset); + static int32_t LdrLitEncoding32(Register rt, int32_t offset); + static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset); + static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset); + static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset); + static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset); + static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); + + std::vector<Fixup> fixups_; + + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + std::deque<Literal> literals_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + FixupId last_fixup_id_; }; } // namespace arm diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 733441b889..68b7931a0c 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -78,13 +78,20 @@ class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler, return imm_value; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + private: std::vector<arm::Register*> registers_; static constexpr const char* kThumb2AssemblyHeader = ".syntax unified\n.thumb\n"; }; - TEST_F(AssemblerThumb2Test, Toolchain) { EXPECT_TRUE(CheckTools()); } @@ -370,4 +377,577 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { DriverStr(expected, "StoreWordPairToNonThumbOffset"); } +TEST_F(AssemblerThumb2Test, TwoCbzMaxOffset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 64; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cbz r0, 1f\n" + // cbz r0, label1 + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cbz r0, 2f\n" // cbz r0, label2 + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzMaxOffset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 0u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 0u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzBeyondMaxOffset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzBeyondMaxOffset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 4u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 4u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzSecondAtMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 62; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 128; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cbz r0, 1f\n" + // cbz r0, label1 + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzSecondAtMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzSecondBeyondMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 62; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 129; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.w 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzSecondBeyondMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzFirstAtMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 127; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 64; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cbz r0, 2f\n" // cbz r0, label2 + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzFirstAtMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzFirstBeyondMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 127; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.w 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzFirstBeyondMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 4u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax1KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.n r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 512; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.w r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax4KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 2046; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.w r1, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax4KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax4KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 2047; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "movw r1, #4096\n" // "as" does not consider (2f - 1f - 4) a constant expression for movw. + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax4KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax64KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 15) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "movw r1, #0xfffc\n" // "as" does not consider (2f - 1f - 4) a constant expression for movw. + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax64KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax64KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 15) - 1u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n" + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax64KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax1MiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 3u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n" + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1MiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1MiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw r1, #(0x100000 & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt r1, #(0x100000 >> 16)\n" + "1:\n" + "add r1, pc\n" + "ldr.w r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax1MiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralFar) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 2u + 0x1234; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw r1, #((0x100000 + 2 * 0x1234) & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt r1, #((0x100000 + 2 * 0x1234) >> 16)\n" + "1:\n" + "add r1, pc\n" + "ldr.w r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralFar"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralWideMax1KiB) { + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::R1, arm::R3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 510; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldrd r1, r3, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralWideMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) { + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::R1, arm::R3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + "1:\n" + "add ip, pc\n" + "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralWideBeyondMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) { + // The literal size must match but the type doesn't, so use an int32_t rather than float. + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::S3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 3u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + "1:\n" + "add ip, pc\n" + "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralSingleMax256KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) { + // The literal size must match but the type doesn't, so use an int64_t rather than double. + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::D3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x40000 & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt ip, #(0x40000 >> 16)\n" + "1:\n" + "add ip, pc\n" + "vldr d3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) { + // The literal size must match but the type doesn't, so use an int64_t rather than double. + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::D3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n" + "1:\n" + "add ip, pc\n" + "vldr d3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralDoubleFar"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, + __ GetAdjustedPosition(label.Position())); +} + } // namespace art diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index cc78002ab0..eb8de0620b 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -31,7 +31,7 @@ namespace arm64 { #define ___ vixl_masm_-> #endif -void Arm64Assembler::EmitSlowPaths() { +void Arm64Assembler::FinalizeCode() { if (!exception_blocks_.empty()) { for (size_t i = 0; i < exception_blocks_.size(); i++) { EmitExceptionPoll(exception_blocks_.at(i)); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index fa9faed66b..b53c11bc24 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -73,8 +73,8 @@ class Arm64Assembler FINAL : public Assembler { delete vixl_masm_; } - // Emit slow paths queued during assembly. - void EmitSlowPaths(); + // Finalize the code. + void FinalizeCode() OVERRIDE; // Size of generated code. size_t CodeSize() const; diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index b016e74aba..6d8a98931f 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -80,10 +80,11 @@ void AssemblerBuffer::FinalizeInstructions(const MemoryRegion& instructions) { } -void AssemblerBuffer::ExtendCapacity() { +void AssemblerBuffer::ExtendCapacity(size_t min_capacity) { size_t old_size = Size(); size_t old_capacity = Capacity(); size_t new_capacity = std::min(old_capacity * 2, old_capacity + 1 * MB); + new_capacity = std::max(new_capacity, min_capacity); // Allocate the new data area and copy contents of the old one to it. uint8_t* new_contents = NewContents(new_capacity); diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 672e1503be..0381af3956 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -199,13 +199,18 @@ class AssemblerBuffer { *reinterpret_cast<T*>(contents_ + position) = value; } - void Move(size_t newposition, size_t oldposition) { - CHECK(HasEnsuredCapacity()); - // Move the contents of the buffer from oldposition to - // newposition by nbytes. - size_t nbytes = Size() - oldposition; - memmove(contents_ + newposition, contents_ + oldposition, nbytes); - cursor_ += newposition - oldposition; + void Resize(size_t new_size) { + if (new_size > Capacity()) { + ExtendCapacity(new_size); + } + cursor_ = contents_ + new_size; + } + + void Move(size_t newposition, size_t oldposition, size_t size) { + // Move a chunk of the buffer from oldposition to newposition. + DCHECK_LE(oldposition + size, Size()); + DCHECK_LE(newposition + size, Size()); + memmove(contents_ + newposition, contents_ + oldposition, size); } // Emit a fixup at the current location. @@ -350,7 +355,7 @@ class AssemblerBuffer { return data + capacity - kMinimumGap; } - void ExtendCapacity(); + void ExtendCapacity(size_t min_capacity = 0u); friend class AssemblerFixup; }; @@ -376,8 +381,8 @@ class Assembler { public: static Assembler* Create(InstructionSet instruction_set); - // Emit slow paths queued during assembly - virtual void EmitSlowPaths() { buffer_.EmitSlowPaths(this); } + // Finalize the code; emit slow paths, fixup branches, add literal pool, etc. + virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); } // Size of generated code virtual size_t CodeSize() const { return buffer_.Size(); } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index a339633efe..017402dbd3 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -544,6 +544,7 @@ class AssemblerTest : public testing::Test { } void DriverWrapper(std::string assembly_text, std::string test_name) { + assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 1a2c9a9000..20f61f942b 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -65,20 +65,33 @@ int CompareIgnoringSpace(const char* s1, const char* s2) { return *s1 - *s2; } -void dump(std::vector<uint8_t>& code, const char* testname) { - // This will only work on the host. There is no as, objcopy or objdump on the - // device. +void InitResults() { + if (test_results.empty()) { + setup_results(); + } +} + +std::string GetToolsDir() { #ifndef HAVE_ANDROID_OS - static bool results_ok = false; + // This will only work on the host. There is no as, objcopy or objdump on the device. static std::string toolsdir; - if (!results_ok) { + if (toolsdir.empty()) { setup_results(); toolsdir = CommonRuntimeTest::GetAndroidTargetToolsDir(kThumb2); SetAndroidData(); - results_ok = true; } + return toolsdir; +#else + return std::string(); +#endif +} + +void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) { +#ifndef HAVE_ANDROID_OS + static std::string toolsdir = GetToolsDir(); + ScratchFile file; const char* filename = file.GetFilename().c_str(); @@ -130,9 +143,6 @@ void dump(std::vector<uint8_t>& code, const char* testname) { FILE *fp = popen(cmd, "r"); ASSERT_TRUE(fp != nullptr); - std::map<std::string, const char**>::iterator results = test_results.find(testname); - ASSERT_NE(results, test_results.end()); - uint32_t lineindex = 0; while (!feof(fp)) { @@ -141,14 +151,14 @@ void dump(std::vector<uint8_t>& code, const char* testname) { if (s == nullptr) { break; } - if (CompareIgnoringSpace(results->second[lineindex], testline) != 0) { + if (CompareIgnoringSpace(results[lineindex], testline) != 0) { LOG(FATAL) << "Output is not as expected at line: " << lineindex - << results->second[lineindex] << "/" << testline; + << results[lineindex] << "/" << testline; } ++lineindex; } // Check that we are at the end. - ASSERT_TRUE(results->second[lineindex] == nullptr); + ASSERT_TRUE(results[lineindex] == nullptr); fclose(fp); } @@ -163,8 +173,31 @@ void dump(std::vector<uint8_t>& code, const char* testname) { #define __ assembler-> +void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname, + const char* const* results) { + __ FinalizeCode(); + size_t cs = __ CodeSize(); + std::vector<uint8_t> managed_code(cs); + MemoryRegion code(&managed_code[0], managed_code.size()); + __ FinalizeInstructions(code); + + DumpAndCheck(managed_code, testname, results); +} + +void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname) { + InitResults(); + std::map<std::string, const char* const*>::iterator results = test_results.find(testname); + ASSERT_NE(results, test_results.end()); + + EmitAndCheck(assembler, testname, results->second); +} + +#undef __ + +#define __ assembler. + TEST(Thumb2AssemblerTest, SimpleMov) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ mov(R8, ShifterOperand(R9)); @@ -172,46 +205,31 @@ TEST(Thumb2AssemblerTest, SimpleMov) { __ mov(R0, ShifterOperand(1)); __ mov(R8, ShifterOperand(9)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMov"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMov"); } TEST(Thumb2AssemblerTest, SimpleMov32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); - assembler->Force32Bit(); + arm::Thumb2Assembler assembler; + __ Force32Bit(); __ mov(R0, ShifterOperand(R1)); __ mov(R8, ShifterOperand(R9)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMov32"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMov32"); } TEST(Thumb2AssemblerTest, SimpleMovAdd) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ add(R0, R1, ShifterOperand(R2)); __ add(R0, R1, ShifterOperand()); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMovAdd"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMovAdd"); } TEST(Thumb2AssemblerTest, DataProcessingRegister) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ mvn(R0, ShifterOperand(R1)); @@ -249,16 +267,11 @@ TEST(Thumb2AssemblerTest, DataProcessingRegister) { // 32 bit variants. __ add(R12, R1, ShifterOperand(R0)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingRegister"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingRegister"); } TEST(Thumb2AssemblerTest, DataProcessingImmediate) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x55)); __ mvn(R0, ShifterOperand(0x55)); @@ -283,16 +296,11 @@ TEST(Thumb2AssemblerTest, DataProcessingImmediate) { __ movs(R0, ShifterOperand(0x55)); __ mvns(R0, ShifterOperand(0x55)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingImmediate"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingImmediate"); } TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x550055)); __ mvn(R0, ShifterOperand(0x550055)); @@ -311,17 +319,12 @@ TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) { __ cmp(R0, ShifterOperand(0x550055)); __ cmn(R0, ShifterOperand(0x550055)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingModifiedImmediate"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingModifiedImmediate"); } TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x550055)); __ mov(R0, ShifterOperand(0x55005500)); @@ -331,16 +334,11 @@ TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) { __ mov(R0, ShifterOperand(0x350)); // rotated to 2nd last position __ mov(R0, ShifterOperand(0x1a8)); // rotated to last position - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingModifiedImmediates"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingModifiedImmediates"); } TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R3, ShifterOperand(R4, LSL, 4)); __ mov(R3, ShifterOperand(R4, LSR, 5)); @@ -355,17 +353,12 @@ TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { __ mov(R8, ShifterOperand(R4, ROR, 7)); __ mov(R8, ShifterOperand(R4, RRX)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingShiftedRegister"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingShiftedRegister"); } TEST(Thumb2AssemblerTest, BasicLoad) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, 24)); __ ldrb(R3, Address(R4, 24)); @@ -382,17 +375,12 @@ TEST(Thumb2AssemblerTest, BasicLoad) { __ ldrsb(R8, Address(R4, 24)); __ ldrsh(R8, Address(R4, 24)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicLoad"); - delete assembler; + EmitAndCheck(&assembler, "BasicLoad"); } TEST(Thumb2AssemblerTest, BasicStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R3, Address(R4, 24)); __ strb(R3, Address(R4, 24)); @@ -405,16 +393,11 @@ TEST(Thumb2AssemblerTest, BasicStore) { __ strb(R8, Address(R4, 24)); __ strh(R8, Address(R4, 24)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicStore"); - delete assembler; + EmitAndCheck(&assembler, "BasicStore"); } TEST(Thumb2AssemblerTest, ComplexLoad) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, 24, Address::Mode::Offset)); __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex)); @@ -451,17 +434,12 @@ TEST(Thumb2AssemblerTest, ComplexLoad) { __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPreIndex)); __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexLoad"); - delete assembler; + EmitAndCheck(&assembler, "ComplexLoad"); } TEST(Thumb2AssemblerTest, ComplexStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R3, Address(R4, 24, Address::Mode::Offset)); __ str(R3, Address(R4, 24, Address::Mode::PreIndex)); @@ -484,16 +462,11 @@ TEST(Thumb2AssemblerTest, ComplexStore) { __ strh(R3, Address(R4, 24, Address::Mode::NegPreIndex)); __ strh(R3, Address(R4, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexStore"); - delete assembler; + EmitAndCheck(&assembler, "ComplexStore"); } TEST(Thumb2AssemblerTest, NegativeLoadStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, -24, Address::Mode::Offset)); __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex)); @@ -551,30 +524,20 @@ TEST(Thumb2AssemblerTest, NegativeLoadStore) { __ strh(R3, Address(R4, -24, Address::Mode::NegPreIndex)); __ strh(R3, Address(R4, -24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "NegativeLoadStore"); - delete assembler; + EmitAndCheck(&assembler, "NegativeLoadStore"); } TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, 24, Address::Mode::Offset)); __ ldrd(R2, Address(R0, 24, Address::Mode::Offset)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "SimpleLoadStoreDual"); } TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, 24, Address::Mode::Offset)); __ strd(R2, Address(R0, 24, Address::Mode::PreIndex)); @@ -590,16 +553,11 @@ TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) { __ ldrd(R2, Address(R0, 24, Address::Mode::NegPreIndex)); __ ldrd(R2, Address(R0, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "ComplexLoadStoreDual"); } TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, -24, Address::Mode::Offset)); __ strd(R2, Address(R0, -24, Address::Mode::PreIndex)); @@ -615,16 +573,11 @@ TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) { __ ldrd(R2, Address(R0, -24, Address::Mode::NegPreIndex)); __ ldrd(R2, Address(R0, -24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "NegativeLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "NegativeLoadStoreDual"); } TEST(Thumb2AssemblerTest, SimpleBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ mov(R0, ShifterOperand(2)); @@ -658,17 +611,12 @@ TEST(Thumb2AssemblerTest, SimpleBranch) { __ Bind(&l5); __ mov(R0, ShifterOperand(6)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleBranch"); - delete assembler; + EmitAndCheck(&assembler, "SimpleBranch"); } TEST(Thumb2AssemblerTest, LongBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); - assembler->Force32Bit(); + arm::Thumb2Assembler assembler; + __ Force32Bit(); // 32 bit branches. Label l1; __ mov(R0, ShifterOperand(2)); @@ -703,16 +651,11 @@ TEST(Thumb2AssemblerTest, LongBranch) { __ Bind(&l5); __ mov(R0, ShifterOperand(6)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LongBranch"); - delete assembler; + EmitAndCheck(&assembler, "LongBranch"); } TEST(Thumb2AssemblerTest, LoadMultiple) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ ldm(DB_W, R4, (1 << R0 | 1 << R3)); @@ -724,16 +667,11 @@ TEST(Thumb2AssemblerTest, LoadMultiple) { // Single reg is converted to ldr __ ldm(DB_W, R4, (1 << R5)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadMultiple"); - delete assembler; + EmitAndCheck(&assembler, "LoadMultiple"); } TEST(Thumb2AssemblerTest, StoreMultiple) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ stm(IA_W, R4, (1 << R0 | 1 << R3)); @@ -746,16 +684,11 @@ TEST(Thumb2AssemblerTest, StoreMultiple) { __ stm(IA_W, R4, (1 << R5)); __ stm(IA, R4, (1 << R5)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StoreMultiple"); - delete assembler; + EmitAndCheck(&assembler, "StoreMultiple"); } TEST(Thumb2AssemblerTest, MovWMovT) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ movw(R4, 0); // 16 bit. __ movw(R4, 0x34); // 16 bit. @@ -768,16 +701,11 @@ TEST(Thumb2AssemblerTest, MovWMovT) { __ movt(R0, 0x1234); __ movt(R1, 0xffff); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "MovWMovT"); - delete assembler; + EmitAndCheck(&assembler, "MovWMovT"); } TEST(Thumb2AssemblerTest, SpecialAddSub) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ add(R2, SP, ShifterOperand(0x50)); // 16 bit. __ add(SP, SP, ShifterOperand(0x50)); // 16 bit. @@ -792,16 +720,11 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) { __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SpecialAddSub"); - delete assembler; + EmitAndCheck(&assembler, "SpecialAddSub"); } TEST(Thumb2AssemblerTest, StoreToOffset) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big. @@ -809,17 +732,12 @@ TEST(Thumb2AssemblerTest, StoreToOffset) { __ StoreToOffset(kStoreHalfword, R0, R12, 12); __ StoreToOffset(kStoreByte, R2, R12, 12); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StoreToOffset"); - delete assembler; + EmitAndCheck(&assembler, "StoreToOffset"); } TEST(Thumb2AssemblerTest, IfThen) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ it(EQ); __ mov(R1, ShifterOperand(1), EQ); @@ -848,16 +766,11 @@ TEST(Thumb2AssemblerTest, IfThen) { __ mov(R3, ShifterOperand(3), EQ); __ mov(R4, ShifterOperand(4), NE); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "IfThen"); - delete assembler; + EmitAndCheck(&assembler, "IfThen"); } TEST(Thumb2AssemblerTest, CbzCbnz) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R2, &l1); @@ -873,16 +786,11 @@ TEST(Thumb2AssemblerTest, CbzCbnz) { __ Bind(&l2); __ mov(R2, ShifterOperand(4)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CbzCbnz"); - delete assembler; + EmitAndCheck(&assembler, "CbzCbnz"); } TEST(Thumb2AssemblerTest, Multiply) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mul(R0, R1, R0); __ mul(R0, R1, R2); @@ -898,16 +806,11 @@ TEST(Thumb2AssemblerTest, Multiply) { __ umull(R0, R1, R2, R3); __ umull(R8, R9, R10, R11); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Multiply"); - delete assembler; + EmitAndCheck(&assembler, "Multiply"); } TEST(Thumb2AssemblerTest, Divide) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ sdiv(R0, R1, R2); __ sdiv(R8, R9, R10); @@ -915,16 +818,11 @@ TEST(Thumb2AssemblerTest, Divide) { __ udiv(R0, R1, R2); __ udiv(R8, R9, R10); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Divide"); - delete assembler; + EmitAndCheck(&assembler, "Divide"); } TEST(Thumb2AssemblerTest, VMov) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vmovs(S1, 1.0); __ vmovd(D1, 1.0); @@ -932,17 +830,12 @@ TEST(Thumb2AssemblerTest, VMov) { __ vmovs(S1, S2); __ vmovd(D1, D2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "VMov"); - delete assembler; + EmitAndCheck(&assembler, "VMov"); } TEST(Thumb2AssemblerTest, BasicFloatingPoint) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vadds(S0, S1, S2); __ vsubs(S0, S1, S2); @@ -964,16 +857,11 @@ TEST(Thumb2AssemblerTest, BasicFloatingPoint) { __ vnegd(D0, D1); __ vsqrtd(D0, D1); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicFloatingPoint"); - delete assembler; + EmitAndCheck(&assembler, "BasicFloatingPoint"); } TEST(Thumb2AssemblerTest, FloatingPointConversions) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vcvtsd(S2, D2); __ vcvtds(D2, S2); @@ -990,16 +878,11 @@ TEST(Thumb2AssemblerTest, FloatingPointConversions) { __ vcvtud(S1, D2); __ vcvtdu(D1, S2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "FloatingPointConversions"); - delete assembler; + EmitAndCheck(&assembler, "FloatingPointConversions"); } TEST(Thumb2AssemblerTest, FloatingPointComparisons) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vcmps(S0, S1); __ vcmpd(D0, D1); @@ -1007,57 +890,37 @@ TEST(Thumb2AssemblerTest, FloatingPointComparisons) { __ vcmpsz(S2); __ vcmpdz(D2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "FloatingPointComparisons"); - delete assembler; + EmitAndCheck(&assembler, "FloatingPointComparisons"); } TEST(Thumb2AssemblerTest, Calls) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ blx(LR); __ bx(LR); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Calls"); - delete assembler; + EmitAndCheck(&assembler, "Calls"); } TEST(Thumb2AssemblerTest, Breakpoint) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ bkpt(0); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Breakpoint"); - delete assembler; + EmitAndCheck(&assembler, "Breakpoint"); } TEST(Thumb2AssemblerTest, StrR1) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R1, Address(SP, 68)); __ str(R1, Address(SP, 1068)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StrR1"); - delete assembler; + EmitAndCheck(&assembler, "StrR1"); } TEST(Thumb2AssemblerTest, VPushPop) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vpushs(S2, 4); __ vpushd(D2, 4); @@ -1065,16 +928,11 @@ TEST(Thumb2AssemblerTest, VPushPop) { __ vpops(S2, 4); __ vpopd(D2, 4); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "VPushPop"); - delete assembler; + EmitAndCheck(&assembler, "VPushPop"); } TEST(Thumb2AssemblerTest, Max16BitBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ b(&l1); @@ -1084,16 +942,11 @@ TEST(Thumb2AssemblerTest, Max16BitBranch) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Max16BitBranch"); - delete assembler; + EmitAndCheck(&assembler, "Max16BitBranch"); } TEST(Thumb2AssemblerTest, Branch32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ b(&l1); @@ -1103,16 +956,11 @@ TEST(Thumb2AssemblerTest, Branch32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Branch32"); - delete assembler; + EmitAndCheck(&assembler, "Branch32"); } TEST(Thumb2AssemblerTest, CompareAndBranchMax) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1122,16 +970,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchMax) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchMax"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchMax"); } TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1141,16 +984,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchRelocation16"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchRelocation16"); } TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1160,16 +998,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchRelocation32"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchRelocation32"); } TEST(Thumb2AssemblerTest, MixedBranch32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; Label l2; @@ -1184,16 +1017,11 @@ TEST(Thumb2AssemblerTest, MixedBranch32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "MixedBranch32"); - delete assembler; + EmitAndCheck(&assembler, "MixedBranch32"); } TEST(Thumb2AssemblerTest, Shifts) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit __ Lsl(R0, R1, 5); @@ -1240,16 +1068,11 @@ TEST(Thumb2AssemblerTest, Shifts) { __ Lsr(R0, R8, R2, true); __ Asr(R0, R1, R8, true); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Shifts"); - delete assembler; + EmitAndCheck(&assembler, "Shifts"); } TEST(Thumb2AssemblerTest, LoadStoreRegOffset) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ ldr(R0, Address(R1, R2)); @@ -1272,16 +1095,11 @@ TEST(Thumb2AssemblerTest, LoadStoreRegOffset) { __ ldr(R0, Address(R1, R8)); __ str(R0, Address(R1, R8)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreRegOffset"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreRegOffset"); } TEST(Thumb2AssemblerTest, LoadStoreLiteral) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R0, Address(4)); __ str(R0, Address(4)); @@ -1295,16 +1113,11 @@ TEST(Thumb2AssemblerTest, LoadStoreLiteral) { __ str(R0, Address(0x3ff)); // 32 bit (no 16 bit str(literal)). __ str(R0, Address(0x7ff)); // 11 bits (32 bit). - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreLiteral"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreLiteral"); } TEST(Thumb2AssemblerTest, LoadStoreLimits) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R0, Address(R4, 124)); // 16 bit. __ ldr(R0, Address(R4, 128)); // 32 bit. @@ -1330,30 +1143,20 @@ TEST(Thumb2AssemblerTest, LoadStoreLimits) { __ strh(R0, Address(R4, 62)); // 16 bit. __ strh(R0, Address(R4, 64)); // 32 bit. - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreLimits"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreLimits"); } TEST(Thumb2AssemblerTest, CompareAndBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; - arm::NearLabel label; + Label label; __ CompareAndBranchIfZero(arm::R0, &label); __ CompareAndBranchIfZero(arm::R11, &label); __ CompareAndBranchIfNonZero(arm::R0, &label); __ CompareAndBranchIfNonZero(arm::R11, &label); __ Bind(&label); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranch"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranch"); } #undef __ diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 841d6a00c0..280ed779b3 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -4832,7 +4832,7 @@ const char* CompareAndBranchResults[] = { nullptr }; -std::map<std::string, const char**> test_results; +std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; test_results["SimpleMov32"] = SimpleMov32Results; |