diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/driver/compiler_driver.cc | 3 | ||||
| -rw-r--r-- | compiler/image_test.cc | 6 | ||||
| -rw-r--r-- | compiler/image_writer.cc | 18 | ||||
| -rw-r--r-- | compiler/image_writer.h | 8 | ||||
| -rw-r--r-- | compiler/jit/jit_compiler.cc | 5 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 81 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.cc | 20 | ||||
| -rw-r--r-- | compiler/optimizing/load_store_elimination.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 29 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 4 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm.cc | 21 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm.h | 73 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm32.cc | 32 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm32.h | 7 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2.cc | 206 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2.h | 39 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_thumb2_test.cc | 310 | ||||
| -rw-r--r-- | compiler/utils/assembler.h | 4 |
19 files changed, 813 insertions, 58 deletions
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index fb116bb3da..d055b37ea7 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -697,6 +697,9 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t } CompiledMethod* CompilerDriver::CompileArtMethod(Thread* self, ArtMethod* method) { + DCHECK_EQ(method, + method->GetInterfaceMethodIfProxy( + Runtime::Current()->GetClassLinker()->GetImagePointerSize())); const uint32_t method_idx = method->GetDexMethodIndex(); const uint32_t access_flags = method->GetAccessFlags(); const InvokeType invoke_type = method->GetInvokeType(); diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 21d582eec4..fd6cd82f7c 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -97,8 +97,10 @@ TEST_F(ImageTest, WriteRead) { ASSERT_TRUE(dup_oat.get() != nullptr); { - bool success_image = - writer->Write(image_file.GetFilename(), dup_oat->GetPath(), dup_oat->GetPath()); + bool success_image = writer->Write(kInvalidImageFd, + image_file.GetFilename(), + dup_oat->GetPath(), + dup_oat->GetPath()); ASSERT_TRUE(success_image); bool success_fixup = ElfWriter::Fixup(dup_oat.get(), writer->GetOatDataBegin()); ASSERT_TRUE(success_fixup); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 0e5a97ffbd..af2a4f9426 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -122,7 +122,8 @@ bool ImageWriter::PrepareImageAddressSpace() { return true; } -bool ImageWriter::Write(const std::string& image_filename, +bool ImageWriter::Write(int image_fd, + const std::string& image_filename, const std::string& oat_filename, const std::string& oat_location) { CHECK(!image_filename.empty()); @@ -178,10 +179,13 @@ bool ImageWriter::Write(const std::string& image_filename, LOG(ERROR) << "Failed to flush and close oat file " << oat_filename << " for " << oat_location; return false; } - - std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str())); - ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); - if (image_file.get() == nullptr) { + std::unique_ptr<File> image_file; + if (image_fd != kInvalidImageFd) { + image_file.reset(new File(image_fd, image_filename, unix_file::kCheckSafeUsage)); + } else { + image_file.reset(OS::CreateEmptyFile(image_filename.c_str())); + } + if (image_file == nullptr) { LOG(ERROR) << "Failed to open image file " << image_filename; return false; } @@ -192,6 +196,7 @@ bool ImageWriter::Write(const std::string& image_filename, } // Write out the image + fields + methods. + ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); const auto write_count = image_header->GetImageSize(); if (!image_file->WriteFully(image_->Begin(), write_count)) { PLOG(ERROR) << "Failed to write image file " << image_filename; @@ -200,7 +205,8 @@ bool ImageWriter::Write(const std::string& image_filename, } // Write out the image bitmap at the page aligned start of the image end. - const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); + const ImageSection& bitmap_section = image_header->GetImageSection( + ImageHeader::kSectionImageBitmap); CHECK_ALIGNED(bitmap_section.Offset(), kPageSize); if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()), bitmap_section.Size(), bitmap_section.Offset())) { diff --git a/compiler/image_writer.h b/compiler/image_writer.h index e235bc4553..7a2febcea1 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -41,6 +41,8 @@ namespace art { +static constexpr int kInvalidImageFd = -1; + // Write a Space built during compilation for use during execution. class ImageWriter FINAL { public: @@ -89,7 +91,11 @@ class ImageWriter FINAL { uint8_t* GetOatFileBegin() const; - bool Write(const std::string& image_filename, const std::string& oat_filename, + // If image_fd is not kInvalidImageFd, then we use that for the file. Otherwise we open + // image_filename. + bool Write(int image_fd, + const std::string& image_filename, + const std::string& oat_filename, const std::string& oat_location) REQUIRES(!Locks::mutator_lock_); diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index c1b87c9cd0..d520208d32 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -192,7 +192,10 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { CompiledMethod* compiled_method = nullptr; { TimingLogger::ScopedTiming t2("Compiling", &logger); - compiled_method = compiler_driver_->CompileArtMethod(self, method); + // If we get a request to compile a proxy method, we pass the actual Java method + // of that proxy method, as the compiler does not expect a proxy method. + ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); + compiled_method = compiler_driver_->CompileArtMethod(self, method_to_compile); } // Trim maps to reduce memory usage. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 8d9794bd79..3dc3b7fba0 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -56,6 +56,8 @@ static constexpr SRegister kFpuCalleeSaves[] = // S registers. Therefore there is no need to block it. static constexpr DRegister DTMP = D31; +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() @@ -513,17 +515,6 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { uint32_t new_position = __ GetAdjustedPosition(old_position); stack_map_stream_.SetStackMapNativePcOffset(i, new_position); } - // Adjust native pc offsets of block labels. - for (HBasicBlock* block : *block_order_) { - // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid - // FirstNonEmptyBlock() which could lead to adjusting a label more than once. - DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size()); - Label* block_label = &block_labels_[block->GetBlockId()]; - DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump()); - if (block_label->IsBound()) { - __ AdjustLabelPosition(block_label); - } - } // Adjust pc offsets for the disassembly information. if (disasm_info_ != nullptr) { GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); @@ -538,10 +529,6 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); } } - // Adjust pc offsets for relative call patches. - for (MethodPatchInfo<Label>& info : relative_call_patches_) { - __ AdjustLabelPosition(&info.label); - } CodeGenerator::Finalize(allocator); } @@ -732,7 +719,8 @@ void CodeGeneratorARM::GenerateFrameExit() { } void CodeGeneratorARM::Bind(HBasicBlock* block) { - __ Bind(GetLabelOf(block)); + Label* label = GetLabelOf(block); + __ BindTrackedLabel(label); } Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { @@ -5255,7 +5243,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: relative_call_patches_.emplace_back(invoke->GetTargetMethod()); - __ Bind(&relative_call_patches_.back().label); + __ BindTrackedLabel(&relative_call_patches_.back().label); // Arbitrarily branch to the BL itself, override at link time. __ bl(&relative_call_patches_.back().label); break; @@ -5378,25 +5366,64 @@ void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); + if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold && + codegen_->GetAssembler()->IsThumb()) { + locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base. + if (switch_instr->GetStartValue() != 0) { + locations->AddTemp(Location::RequiresRegister()); // We need a temp for the bias. + } + } } void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - GenerateCompareWithImmediate(value_reg, lower_bound + i); - __ b(codegen_->GetLabelOf(successors[i]), EQ); - } + if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) { + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + GenerateCompareWithImmediate(value_reg, lower_bound + i); + __ b(codegen_->GetLabelOf(successors[i]), EQ); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ b(codegen_->GetLabelOf(default_block)); + } + } else { + // Create a table lookup. + Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); + + // Materialize a pointer to the switch table + std::vector<Label*> labels(num_entries); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels), temp_reg); + + // Remove the bias. + Register key_reg; + if (lower_bound != 0) { + key_reg = locations->GetTemp(1).AsRegister<Register>(); + __ AddConstant(key_reg, value_reg, -lower_bound); + } else { + key_reg = value_reg; + } + + // Check whether the value is in the table, jump to default block if not. + __ CmpConstant(key_reg, num_entries - 1); + __ b(codegen_->GetLabelOf(default_block), Condition::HI); + + // Load the displacement from the table. + __ ldr(temp_reg, Address(temp_reg, key_reg, Shift::LSL, 2)); - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ b(codegen_->GetLabelOf(default_block)); + // Dispatch is a direct add to the PC (for Thumb2). + __ EmitJumpTableDispatch(table, temp_reg); } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 0aaa6b3f2c..353881e47a 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -494,6 +494,26 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, << " it is in a different dex file and requires access to the dex cache"; return false; } + + if (current->IsNewInstance() && + (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) { + // Allocation entrypoint does not handle inlined frames. + return false; + } + + if (current->IsNewArray() && + (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) { + // Allocation entrypoint does not handle inlined frames. + return false; + } + + if (current->IsUnresolvedStaticFieldGet() || + current->IsUnresolvedInstanceFieldGet() || + current->IsUnresolvedStaticFieldSet() || + current->IsUnresolvedInstanceFieldSet()) { + // Entrypoint for unresolved fields does not handle inlined frames. + return false; + } } } number_of_inlined_instructions_ += number_of_instructions; diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 90f28e511e..6fbb6823d6 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -59,7 +59,7 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { (use->IsInstanceFieldSet() && (reference_ == use->InputAt(1))) || (use->IsUnresolvedInstanceFieldSet() && (reference_ == use->InputAt(1))) || (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) || - (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(1))) || + (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(0))) || (use->IsArraySet() && (reference_ == use->InputAt(2)))) { // reference_ is merged to a phi, passed to a callee, or stored to heap. // reference_ isn't the only name that can refer to its value anymore. diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 8b28ff91d4..68fb0acf7f 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1652,7 +1652,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update the meta information surrounding blocks: // (1) the graph they are now in, // (2) the reverse post order of that graph, - // (3) the potential loop information they are now in. + // (3) the potential loop information they are now in, + // (4) try block membership. // We don't add the entry block, the exit block, and the first block, which // has been merged with `at`. @@ -1668,41 +1669,47 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { size_t index_of_at = IndexOfElement(outer_graph->reverse_post_order_, at); MakeRoomFor(&outer_graph->reverse_post_order_, blocks_added, index_of_at); - // Do a reverse post order of the blocks in the callee and do (1), (2), - // and (3) to the blocks that apply. - HLoopInformation* info = at->GetLoopInformation(); + HLoopInformation* loop_info = at->GetLoopInformation(); + // Copy TryCatchInformation if `at` is a try block, not if it is a catch block. + TryCatchInformation* try_catch_info = at->IsTryBlock() ? at->GetTryCatchInformation() : nullptr; + + // Do a reverse post order of the blocks in the callee and do (1), (2), (3) + // and (4) to the blocks that apply. for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); if (current != exit_block_ && current != entry_block_ && current != first) { DCHECK(!current->IsInLoop()); + DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - if (info != nullptr) { - current->SetLoopInformation(info); + if (loop_info != nullptr) { + current->SetLoopInformation(loop_info); for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(current); } } + current->SetTryCatchInformation(try_catch_info); } } - // Do (1), (2), and (3) to `to`. + // Do (1), (2), (3) and (4) to `to`. to->SetGraph(outer_graph); outer_graph->AddBlock(to); outer_graph->reverse_post_order_[++index_of_at] = to; - if (info != nullptr) { - to->SetLoopInformation(info); + if (loop_info != nullptr) { + to->SetLoopInformation(loop_info); for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(to); } - if (info->IsBackEdge(*at)) { + if (loop_info->IsBackEdge(*at)) { // Only `to` can become a back edge, as the inlined blocks // are predecessors of `to`. - info->ReplaceBackEdge(at, to); + loop_info->ReplaceBackEdge(at, to); } } + to->SetTryCatchInformation(try_catch_info); } // Update the next instruction id of the outer graph, so that instructions diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7df586692b..0f2c1cffee 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -4750,6 +4750,9 @@ class HLoadClass : public HExpression<1> { return generate_clinit_check_; } void SetMustGenerateClinitCheck(bool generate_clinit_check) { + // The entrypoint the code generator is going to call does not do + // clinit of the class. + DCHECK(!NeedsAccessCheck()); generate_clinit_check_ = generate_clinit_check; } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 6632f95ebe..8cb2cfc816 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -492,6 +492,8 @@ static void RunOptimizations(HGraph* graph, RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); + MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); + // TODO: Update passes incompatible with try/catch so we have the same // pipeline for all methods. if (graph->HasTryCatch()) { @@ -507,8 +509,6 @@ static void RunOptimizations(HGraph* graph, RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); } else { - MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); - HOptimization* optimizations2[] = { // BooleanSimplifier depends on the InstructionSimplifier removing // redundant suspend checks to recognize empty blocks. diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 807bedaa04..68e39568bb 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -16,6 +16,8 @@ #include "assembler_arm.h" +#include <algorithm> + #include "base/bit_utils.h" #include "base/logging.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -922,5 +924,24 @@ uint32_t ArmAssembler::ModifiedImmediate(uint32_t value) { return value | i << 26 | imm3 << 12 | a << 7; } +void ArmAssembler::FinalizeTrackedLabels() { + if (!tracked_labels_.empty()) { + // This array should be sorted, as assembly is generated in linearized order. It isn't + // technically required, but GetAdjustedPosition() used in AdjustLabelPosition() can take + // advantage of it. So ensure that it's actually the case. + DCHECK(std::is_sorted( + tracked_labels_.begin(), + tracked_labels_.end(), + [](const Label* lhs, const Label* rhs) { return lhs->Position() < rhs->Position(); })); + + Label* last_label = nullptr; // Track duplicates, we must not adjust twice. + for (Label* label : tracked_labels_) { + DCHECK_NE(label, last_label); + AdjustLabelPosition(label); + last_label = label; + } + } +} + } // namespace arm } // namespace art diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index d59bc6be40..4a6e6d7c3f 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -77,6 +77,45 @@ class Literal { DISALLOW_COPY_AND_ASSIGN(Literal); }; +// Jump table: table of labels emitted after the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(std::vector<Label*>&& labels) + : label_(), anchor_label_(), labels_(std::move(labels)) { + } + + uint32_t GetSize() const { + return static_cast<uint32_t>(labels_.size()) * sizeof(uint32_t); + } + + const std::vector<Label*>& GetData() const { + return labels_; + } + + Label* GetLabel() { + return &label_; + } + + const Label* GetLabel() const { + return &label_; + } + + Label* GetAnchorLabel() { + return &anchor_label_; + } + + const Label* GetAnchorLabel() const { + return &anchor_label_; + } + + private: + Label label_; + Label anchor_label_; + std::vector<Label*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); +}; + class ShifterOperand { public: ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister), @@ -685,6 +724,8 @@ class ArmAssembler : public Assembler { AddConstant(rd, rd, value, cond, set_cc); } + virtual void CmpConstant(Register rn, int32_t value, Condition cond = AL) = 0; + // Load and Store. May clobber IP. virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0; void LoadSImmediate(SRegister sd, float value, Condition cond = AL) { @@ -996,11 +1037,43 @@ class ArmAssembler : public Assembler { b(label); } + // Jump table support. This is split into three functions: + // + // * CreateJumpTable creates the internal metadata to track the jump targets, and emits code to + // load the base address of the jump table. + // + // * EmitJumpTableDispatch emits the code to actually jump, assuming that the right table value + // has been loaded into a register already. + // + // * FinalizeTables emits the jump table into the literal pool. This can only be called after the + // labels for the jump targets have been finalized. + + // Create a jump table for the given labels that will be emitted when finalizing. Create a load + // sequence (or placeholder) that stores the base address into the given register. When the table + // is emitted, offsets will be relative to the location EmitJumpTableDispatch was called on (the + // anchor). + virtual JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) = 0; + + // Emit the jump-table jump, assuming that the right value was loaded into displacement_reg. + virtual void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) = 0; + + // Bind a Label that needs to be updated by the assembler in FinalizeCode() if its position + // changes due to branch/literal fixup. + void BindTrackedLabel(Label* label) { + Bind(label); + tracked_labels_.push_back(label); + } + protected: // Returns whether or not the given register is used for passing parameters. static int RegisterCompare(const Register* reg1, const Register* reg2) { return *reg1 - *reg2; } + + void FinalizeTrackedLabels(); + + // Tracked labels. Use a vector, as we need to sort before adjusting. + std::vector<Label*> tracked_labels_; }; // Slowpath entered when Thread::Current()->_exception is non-null diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 6e7c828b4a..a7dbacd3a9 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1385,6 +1385,21 @@ void Arm32Assembler::AddConstant(Register rd, Register rn, int32_t value, } } +void Arm32Assembler::CmpConstant(Register rn, int32_t value, Condition cond) { + ShifterOperand shifter_op; + if (ShifterOperandCanHoldArm32(value, &shifter_op)) { + cmp(rn, shifter_op, cond); + } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) { + cmn(rn, shifter_op, cond); + } else { + movw(IP, Low16Bits(value), cond); + uint16_t value_high = High16Bits(value); + if (value_high != 0) { + movt(IP, value_high, cond); + } + cmp(rn, ShifterOperand(IP), cond); + } +} void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) { ShifterOperand shifter_op; @@ -1584,6 +1599,23 @@ void Arm32Assembler::CompareAndBranchIfNonZero(Register r, Label* label) { b(label, NE); } +JumpTable* Arm32Assembler::CreateJumpTable(std::vector<Label*>&& labels ATTRIBUTE_UNUSED, + Register base_reg ATTRIBUTE_UNUSED) { + LOG(FATAL) << "CreateJumpTable is not supported on ARM32"; + UNREACHABLE(); +} + +void Arm32Assembler::EmitJumpTableDispatch(JumpTable* jump_table ATTRIBUTE_UNUSED, + Register displacement_reg ATTRIBUTE_UNUSED) { + LOG(FATAL) << "EmitJumpTableDispatch is not supported on ARM32"; + UNREACHABLE(); +} + +void Arm32Assembler::FinalizeCode() { + ArmAssembler::FinalizeCode(); + // Currently the arm32 assembler does not support fixups, and thus no tracking. We must not call + // FinalizeTrackedLabels(), which would lead to an abort. +} } // namespace arm } // namespace art diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 4646538716..5233dcbbb0 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -261,6 +261,8 @@ class Arm32Assembler FINAL : public ArmAssembler { void AddConstant(Register rd, Register rn, int32_t value, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; + void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE; + // Load and Store. May clobber IP. void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void MarkExceptionHandler(Label* label) OVERRIDE; @@ -308,6 +310,11 @@ class Arm32Assembler FINAL : public ArmAssembler { void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; + void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE; + + void FinalizeCode() OVERRIDE; + private: void EmitType01(Condition cond, int type, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index cc87856e82..fb3aa1ea85 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -92,7 +92,7 @@ void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { label->BindTo(bound_pc); } -void Thumb2Assembler::BindLiterals() { +uint32_t Thumb2Assembler::BindLiterals() { // We don't add the padding here, that's done only after adjusting the Fixup sizes. uint32_t code_size = buffer_.Size(); for (Literal& lit : literals_) { @@ -100,6 +100,15 @@ void Thumb2Assembler::BindLiterals() { BindLabel(label, code_size); code_size += lit.GetSize(); } + return code_size; +} + +void Thumb2Assembler::BindJumpTables(uint32_t code_size) { + for (JumpTable& table : jump_tables_) { + Label* label = table.GetLabel(); + BindLabel(label, code_size); + code_size += table.GetSize(); + } } void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, @@ -144,7 +153,7 @@ uint32_t Thumb2Assembler::AdjustFixups() { AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); } while (!fixups_to_recalculate.empty()); - if ((current_code_size & 2) != 0 && !literals_.empty()) { + if ((current_code_size & 2) != 0 && (!literals_.empty() || !jump_tables_.empty())) { // If we need to add padding before literals, this may just push some out of range, // so recalculate all load literals. This makes up for the fact that we don't mark // load literal as a dependency of all previous Fixups even though it actually is. @@ -173,6 +182,13 @@ uint32_t Thumb2Assembler::AdjustFixups() { label->Reinitialize(); label->BindTo(old_position + literals_adjustment); } + for (JumpTable& table : jump_tables_) { + Label* label = table.GetLabel(); + DCHECK(label->IsBound()); + int old_position = label->Position(); + label->Reinitialize(); + label->BindTo(old_position + literals_adjustment); + } } return current_code_size; @@ -229,6 +245,43 @@ void Thumb2Assembler::EmitLiterals() { } } +void Thumb2Assembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + // Jump tables require 4 byte alignment. (We don't support byte and half-word jump tables.) + uint32_t code_size = buffer_.Size(); + DCHECK_ALIGNED(code_size, 2); + if ((code_size & 2u) != 0u) { + Emit16(0); + } + for (JumpTable& table : jump_tables_) { + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + buffer_.ExtendCapacity(orig_size + table.GetSize()); +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + DCHECK_EQ(static_cast<size_t>(table.GetLabel()->Position()), buffer_.Size()); + int32_t anchor_position = table.GetAnchorLabel()->Position() + 4; + + for (Label* target : table.GetData()) { + // Ensure that the label was tracked, so that it will have the right position. + DCHECK(std::find(tracked_labels_.begin(), tracked_labels_.end(), target) != + tracked_labels_.end()); + + int32_t offset = target->Position() - anchor_position; + buffer_.Emit<int32_t>(offset); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + size_t new_size = buffer_.Size(); + DCHECK_LE(new_size - orig_size, table.GetSize()); + } + } +} + inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { DCHECK_ALIGNED(offset, 2); int16_t encoding = B15 | B14; @@ -382,12 +435,34 @@ inline int32_t Thumb2Assembler::LdrRtRnImm12Encoding(Register rt, Register rn, i return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset; } +inline int16_t Thumb2Assembler::AdrEncoding16(Register rd, int32_t offset) { + DCHECK(IsUint<10>(offset)); + DCHECK(IsAligned<4>(offset)); + DCHECK(!IsHighRegister(rd)); + return B15 | B13 | (rd << 8) | (offset >> 2); +} + +inline int32_t Thumb2Assembler::AdrEncoding32(Register rd, int32_t offset) { + DCHECK(IsUint<12>(offset)); + // Bit 26: offset[11] + // Bits 14-12: offset[10-8] + // Bits 7-0: offset[7-0] + int32_t immediate_mask = + ((offset & (1 << 11)) << (26 - 11)) | + ((offset & (7 << 8)) << (12 - 8)) | + (offset & 0xFF); + return B31 | B30 | B29 | B28 | B25 | B19 | B18 | B17 | B16 | (rd << 8) | immediate_mask; +} + void Thumb2Assembler::FinalizeCode() { ArmAssembler::FinalizeCode(); - BindLiterals(); + uint32_t size_after_literals = BindLiterals(); + BindJumpTables(size_after_literals); uint32_t adjusted_code_size = AdjustFixups(); EmitFixups(adjusted_code_size); EmitLiterals(); + FinalizeTrackedLabels(); + EmitJumpTables(); } bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) { @@ -1770,6 +1845,15 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { case kLiteralFar: return 14u; + case kLiteralAddr1KiB: + return 2u; + case kLiteralAddr4KiB: + return 4u; + case kLiteralAddr64KiB: + return 6u; + case kLiteralAddrFar: + return 10u; + case kLongOrFPLiteral1KiB: return 4u; case kLongOrFPLiteral256KiB: @@ -1831,6 +1915,8 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con case kLiteral1KiB: case kLiteral4KiB: case kLongOrFPLiteral1KiB: + case kLiteralAddr1KiB: + case kLiteralAddr4KiB: DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2)); diff += LiteralPoolPaddingSize(current_code_size); // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC @@ -1843,12 +1929,14 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con case kLiteral1MiB: case kLiteral64KiB: case kLongOrFPLiteral256KiB: + case kLiteralAddr64KiB: DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. diff -= 4; // One extra 32-bit MOV. diff += LiteralPoolPaddingSize(current_code_size); break; case kLiteralFar: case kLongOrFPLiteralFar: + case kLiteralAddrFar: DCHECK_GE(diff, 8); // The target must be at least 4 bytes after the ADD rX, PC. diff -= 8; // Extra MOVW+MOVT; both 32-bit. diff += LiteralPoolPaddingSize(current_code_size); @@ -1929,6 +2017,29 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) // This encoding can reach any target. break; + case kLiteralAddr1KiB: + DCHECK(!IsHighRegister(rn_)); + if (IsUint<10>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteralAddr4KiB); + FALLTHROUGH_INTENDED; + case kLiteralAddr4KiB: + if (IsUint<12>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteralAddr64KiB); + FALLTHROUGH_INTENDED; + case kLiteralAddr64KiB: + if (IsUint<16>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteralAddrFar); + FALLTHROUGH_INTENDED; + case kLiteralAddrFar: + // This encoding can reach any target. + break; + case kLongOrFPLiteral1KiB: if (IsUint<10>(GetOffset(current_code_size))) { break; @@ -2055,6 +2166,42 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c break; } + case kLiteralAddr1KiB: { + DCHECK(type_ == kLoadLiteralAddr); + int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(location_, encoding); + break; + } + case kLiteralAddr4KiB: { + DCHECK(type_ == kLoadLiteralAddr); + int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + case kLiteralAddr64KiB: { + DCHECK(type_ == kLoadLiteralAddr); + int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + break; + } + case kLiteralAddrFar: { + DCHECK(type_ == kLoadLiteralAddr); + int32_t offset = GetOffset(code_size); + int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); + int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + buffer->Store<int16_t>(location_, movw_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + break; + } + case kLongOrFPLiteral1KiB: { int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. buffer->Store<int16_t>(location_, encoding >> 16); @@ -3260,6 +3407,25 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value, } } +void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) { + // We prefer to select the shorter code sequence rather than selecting add for + // positive values and sub for negatives ones, which would slightly improve + // the readability of generated code for some constants. + ShifterOperand shifter_op; + if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) { + cmp(rn, shifter_op, cond); + } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) { + cmn(rn, shifter_op, cond); + } else { + CHECK(rn != IP); + movw(IP, Low16Bits(value), cond); + uint16_t value_high = High16Bits(value); + if (value_high != 0) { + movt(IP, value_high, cond); + } + cmp(rn, ShifterOperand(IP), cond); + } +} void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) { ShifterOperand shifter_op; @@ -3476,5 +3642,39 @@ void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) { b(label, NE); } } + +JumpTable* Thumb2Assembler::CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + + bool use32bit = IsForced32Bit() || IsHighRegister(base_reg); + uint32_t location = buffer_.Size(); + Fixup::Size size = use32bit ? Fixup::kLiteralAddr4KiB : Fixup::kLiteralAddr1KiB; + FixupId fixup_id = AddFixup(Fixup::LoadLiteralAddress(location, base_reg, size)); + Emit16(static_cast<uint16_t>(table->GetLabel()->position_)); + table->GetLabel()->LinkTo(fixup_id); + if (use32bit) { + Emit16(0); + } + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); + + return table; +} + +void Thumb2Assembler::EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) { + CHECK(!IsForced32Bit()) << "Forced 32-bit dispatch not implemented yet"; + // 32-bit ADD doesn't support PC as an input, so we need a two-instruction sequence: + // SUB ip, ip, #0 + // ADD pc, ip, reg + // TODO: Implement. + + // The anchor's position needs to be fixed up before we can compute offsets - so make it a tracked + // label. + BindTrackedLabel(jump_table->GetAnchorLabel()); + + add(PC, PC, ShifterOperand(displacement_reg)); +} + } // namespace arm } // namespace art diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 055b1379ad..38fd244087 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ #include <deque> +#include <utility> #include <vector> #include "base/logging.h" @@ -304,6 +305,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { void AddConstant(Register rd, Register rn, int32_t value, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; + void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE; + // Load and Store. May clobber IP. void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void MarkExceptionHandler(Label* label) OVERRIDE; @@ -358,6 +361,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { force_32bit_ = true; } + // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This + // will generate a fixup. + JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; + // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup. + void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE; + private: typedef uint16_t FixupId; @@ -399,6 +408,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { kCompareAndBranchXZero, // cbz/cbnz. kLoadLiteralNarrow, // Load narrrow integer literal. kLoadLiteralWide, // Load wide integer literal. + kLoadLiteralAddr, // Load address of literal (used for jump table). kLoadFPLiteralSingle, // Load FP literal single. kLoadFPLiteralDouble, // Load FP literal double. }; @@ -429,6 +439,16 @@ class Thumb2Assembler FINAL : public ArmAssembler { // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes. kLiteralFar, + // Load literal base addr. + // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes. + kLiteralAddr1KiB, + // ADR rX, label; 4KiB offset. 4 bytes. + kLiteralAddr4KiB, + // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes. + kLiteralAddr64KiB, + // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes. + kLiteralAddrFar, + // Load long or FP literal variants. // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. kLongOrFPLiteral1KiB, @@ -457,7 +477,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { } // Load narrow literal. - static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) { + static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || size == kLiteral1MiB || size == kLiteralFar); DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); @@ -493,6 +513,14 @@ class Thumb2Assembler FINAL : public ArmAssembler { AL, kLoadFPLiteralDouble, size, location); } + static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) { + DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB || + size == kLiteralAddrFar); + DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB); + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadLiteralAddr, size, location); + } + Type GetType() const { return type_; } @@ -756,12 +784,14 @@ class Thumb2Assembler FINAL : public ArmAssembler { } void BindLabel(Label* label, uint32_t bound_pc); - void BindLiterals(); + uint32_t BindLiterals(); + void BindJumpTables(uint32_t code_size); void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, std::deque<FixupId>* fixups_to_recalculate); uint32_t AdjustFixups(); void EmitFixups(uint32_t adjusted_code_size); void EmitLiterals(); + void EmitJumpTables(); static int16_t BEncoding16(int32_t offset, Condition cond); static int32_t BEncoding32(int32_t offset, Condition cond); @@ -778,6 +808,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset); static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset); static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); + static int16_t AdrEncoding16(Register rd, int32_t offset); + static int32_t AdrEncoding32(Register rd, int32_t offset); std::vector<Fixup> fixups_; std::unique_ptr<FixupId[]> fixup_dependents_; @@ -786,6 +818,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { // without invalidating pointers and references to existing elements. std::deque<Literal> literals_; + // Jump table list. + std::deque<JumpTable> jump_tables_; + // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 9c08ce017e..cb4b20b5ba 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -17,6 +17,7 @@ #include "assembler_thumb2.h" #include "base/stl_util.h" +#include "base/stringprintf.h" #include "utils/assembler_test.h" namespace art { @@ -1011,6 +1012,315 @@ TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1KiBDueToAlignmentOnSecondPass) __ GetAdjustedPosition(label.Position())); } +TEST_F(AssemblerThumb2Test, BindTrackedLabel) { + Label non_tracked, tracked, branch_target; + + // A few dummy loads on entry. + constexpr size_t kLdrR0R0Count = 5; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + // A branch that will need to be fixed up. + __ cbz(arm::R0, &branch_target); + + // Some more dummy loads. + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + // Now insert tracked and untracked label. + __ Bind(&non_tracked); + __ BindTrackedLabel(&tracked); + + // A lot of dummy loads, to ensure the branch needs resizing. + constexpr size_t kLdrR0R0CountLong = 60; + for (size_t i = 0; i != kLdrR0R0CountLong; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + // Bind the branch target. + __ Bind(&branch_target); + + // One more load. + __ ldr(arm::R0, arm::Address(arm::R0)); + + std::string expected = + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + "cmp r0, #0\n" // cbz r0, 1f + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count + kLdrR0R0CountLong, "ldr r0, [r0]\n") + + "1:\n" + "ldr r0, [r0]\n"; + DriverStr(expected, "BindTrackedLabel"); + + // Expectation is that the tracked label should have moved. + EXPECT_LT(non_tracked.Position(), tracked.Position()); +} + +TEST_F(AssemblerThumb2Test, JumpTable) { + // The jump table. Use three labels. + Label label1, label2, label3; + std::vector<Label*> labels({ &label1, &label2, &label3 }); + + // A few dummy loads on entry, interspersed with 2 labels. + constexpr size_t kLdrR0R0Count = 5; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label1); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label2); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + // Create the jump table, emit the base load. + arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1); + + // Dummy computation, stand-in for the address. We're only testing the jump table here, not how + // it's being used. + __ ldr(arm::R0, arm::Address(arm::R0)); + + // Emit the jump + __ EmitJumpTableDispatch(jump_table, arm::R1); + + // Some more dummy instructions. + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label3); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { // Note: odd so there's no alignment + __ ldr(arm::R0, arm::Address(arm::R0)); // necessary, as gcc as emits nops, + } // whereas we emit 0 != nop. + + static_assert((kLdrR0R0Count + 3) * 2 < 1 * KB, "Too much offset"); + + std::string expected = + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L1:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L2:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + "adr r1, .Ljump_table\n" + "ldr r0, [r0]\n" + ".Lbase:\n" + "add pc, r1\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L3:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2\n" + ".Ljump_table:\n" + ".4byte (.L1 - .Lbase - 4)\n" + ".4byte (.L2 - .Lbase - 4)\n" + ".4byte (.L3 - .Lbase - 4)\n"; + DriverStr(expected, "JumpTable"); +} + +// Test for >1K fixup. +TEST_F(AssemblerThumb2Test, JumpTable4K) { + // The jump table. Use three labels. + Label label1, label2, label3; + std::vector<Label*> labels({ &label1, &label2, &label3 }); + + // A few dummy loads on entry, interspersed with 2 labels. + constexpr size_t kLdrR0R0Count = 5; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label1); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label2); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + // Create the jump table, emit the base load. + arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1); + + // Dummy computation, stand-in for the address. We're only testing the jump table here, not how + // it's being used. + __ ldr(arm::R0, arm::Address(arm::R0)); + + // Emit the jump + __ EmitJumpTableDispatch(jump_table, arm::R1); + + // Some more dummy instructions. + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label3); + constexpr size_t kLdrR0R0Count2 = 600; // Note: even so there's no alignment + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { // necessary, as gcc as emits nops, + __ ldr(arm::R0, arm::Address(arm::R0)); // whereas we emit 0 != nop. + } + + static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 1 * KB, "Not enough offset"); + static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 4 * KB, "Too much offset"); + + std::string expected = + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L1:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L2:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + "adr r1, .Ljump_table\n" + "ldr r0, [r0]\n" + ".Lbase:\n" + "add pc, r1\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L3:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + ".align 2\n" + ".Ljump_table:\n" + ".4byte (.L1 - .Lbase - 4)\n" + ".4byte (.L2 - .Lbase - 4)\n" + ".4byte (.L3 - .Lbase - 4)\n"; + DriverStr(expected, "JumpTable4K"); +} + +// Test for >4K fixup. +TEST_F(AssemblerThumb2Test, JumpTable64K) { + // The jump table. Use three labels. + Label label1, label2, label3; + std::vector<Label*> labels({ &label1, &label2, &label3 }); + + // A few dummy loads on entry, interspersed with 2 labels. + constexpr size_t kLdrR0R0Count = 5; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label1); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label2); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + // Create the jump table, emit the base load. + arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1); + + // Dummy computation, stand-in for the address. We're only testing the jump table here, not how + // it's being used. + __ ldr(arm::R0, arm::Address(arm::R0)); + + // Emit the jump + __ EmitJumpTableDispatch(jump_table, arm::R1); + + // Some more dummy instructions. + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label3); + constexpr size_t kLdrR0R0Count2 = 2601; // Note: odd so there's no alignment + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { // necessary, as gcc as emits nops, + __ ldr(arm::R0, arm::Address(arm::R0)); // whereas we emit 0 != nop. + } + + static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 4 * KB, "Not enough offset"); + static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 64 * KB, "Too much offset"); + + std::string expected = + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L1:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L2:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself. + // (Note: have to use constants, as labels aren't accepted. + "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) + + ") * 2 - 4) & 0xFFFF)\n" + "add r1, pc\n" + "ldr r0, [r0]\n" + ".Lbase:\n" + "add pc, r1\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L3:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + ".align 2\n" + ".Ljump_table:\n" + ".4byte (.L1 - .Lbase - 4)\n" + ".4byte (.L2 - .Lbase - 4)\n" + ".4byte (.L3 - .Lbase - 4)\n"; + DriverStr(expected, "JumpTable64K"); +} + +// Test for >64K fixup. +TEST_F(AssemblerThumb2Test, JumpTableFar) { + // The jump table. Use three labels. + Label label1, label2, label3; + std::vector<Label*> labels({ &label1, &label2, &label3 }); + + // A few dummy loads on entry, interspersed with 2 labels. + constexpr size_t kLdrR0R0Count = 5; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label1); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label2); + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + // Create the jump table, emit the base load. + arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1); + + // Dummy computation, stand-in for the address. We're only testing the jump table here, not how + // it's being used. + __ ldr(arm::R0, arm::Address(arm::R0)); + + // Emit the jump + __ EmitJumpTableDispatch(jump_table, arm::R1); + + // Some more dummy instructions. + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ BindTrackedLabel(&label3); + constexpr size_t kLdrR0R0Count2 = 70001; // Note: odd so there's no alignment + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { // necessary, as gcc as emits nops, + __ ldr(arm::R0, arm::Address(arm::R0)); // whereas we emit 0 != nop. + } + + static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 64 * KB, "Not enough offset"); + + std::string expected = + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L1:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L2:\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself. + // (Note: have to use constants, as labels aren't accepted. + "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) + + ") * 2 - 4) & 0xFFFF)\n" + "movt r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) + + ") * 2 - 4) >> 16)\n" + ".Lhelp:" + "add r1, pc\n" + "ldr r0, [r0]\n" + ".Lbase:\n" + "add pc, r1\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".L3:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + ".align 2\n" + ".Ljump_table:\n" + ".4byte (.L1 - .Lbase - 4)\n" + ".4byte (.L2 - .Lbase - 4)\n" + ".4byte (.L3 - .Lbase - 4)\n"; + DriverStr(expected, "JumpTableFar"); +} + TEST_F(AssemblerThumb2Test, Clz) { __ clz(arm::R0, arm::R1); diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index d97a2a40b2..dfe6babb25 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -227,6 +227,8 @@ class AssemblerBuffer { // Returns the position in the instruction stream. int GetPosition() { return cursor_ - contents_; } + void ExtendCapacity(size_t min_capacity = 0u); + private: // The limit is set to kMinimumGap bytes before the end of the data area. // This leaves enough space for the longest possible instruction and allows @@ -261,8 +263,6 @@ class AssemblerBuffer { return data + capacity - kMinimumGap; } - void ExtendCapacity(size_t min_capacity = 0u); - friend class AssemblerFixup; }; |