diff options
author | 2016-09-10 02:32:44 -0700 | |
---|---|---|
committer | 2016-09-14 00:44:11 -0700 | |
commit | 96b6682d2d65f94c262590ef88bafdc70171ab8c (patch) | |
tree | ebb8a72edbaff6a6c62a6ce8bfb177a57389abaf | |
parent | 9ef68a3ad02eb7e2242a6d7f6a208c7a9b8ac407 (diff) |
MIPS32: Implement table-based packed switch
Test: booted MIPS32R2 in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R2) on CI20
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R6) in QEMU
Test: test-art-host-gtest
Change-Id: I2e1a65ff1ba9406b84351ba7998f853b1ce4aef9
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 112 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.h | 19 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 3 | ||||
-rw-r--r-- | compiler/optimizing/nodes_mips.h | 35 | ||||
-rw-r--r-- | compiler/optimizing/pc_relative_fixups_mips.cc | 19 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 238 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.h | 65 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips32r6_test.cc | 43 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips_test.cc | 38 |
9 files changed, 512 insertions, 60 deletions
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index f07f8a0d91..306538c188 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -5824,13 +5824,11 @@ void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - +void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { // Create a set of compare/jumps. Register temp_reg = TMP; __ Addiu32(temp_reg, value_reg, -lower_bound); @@ -5839,7 +5837,7 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr // this case, index >= num_entries must be true. So that we can save one branch instruction. __ Bltz(temp_reg, codegen_->GetLabelOf(default_block)); - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); // Jump to successors[0] if value == lower_bound. __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0])); int32_t last_index = 0; @@ -5857,11 +5855,107 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { __ B(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorMIPS::GenTableBasedPackedSwitch(Register value_reg, + Register constant_area, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Create a jump table. + std::vector<MipsLabel*> labels(num_entries); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels)); + + // Is the value in range? + __ Addiu32(TMP, value_reg, -lower_bound); + if (IsInt<16>(static_cast<int32_t>(num_entries))) { + __ Sltiu(AT, TMP, num_entries); + __ Beqz(AT, codegen_->GetLabelOf(default_block)); + } else { + __ LoadConst32(AT, num_entries); + __ Bgeu(TMP, AT, codegen_->GetLabelOf(default_block)); + } + + // We are in the range of the table. + // Load the target address from the jump table, indexing by the value. + __ LoadLabelAddress(AT, constant_area, table->GetLabel()); + __ Sll(TMP, TMP, 2); + __ Addu(TMP, TMP, AT); + __ Lw(TMP, TMP, 0); + // Compute the absolute target address by adding the table start address + // (the table contains offsets to targets relative to its start). + __ Addu(TMP, TMP, AT); + // And jump. + __ Jr(TMP); + __ NopIfNoReordering(); +} + +void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + HBasicBlock* switch_block = switch_instr->GetBlock(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + if (codegen_->GetInstructionSetFeatures().IsR6() && + num_entries > kPackedSwitchJumpTableThreshold) { + // R6 uses PC-relative addressing to access the jump table. + // R2, OTOH, requires an HMipsComputeBaseMethodAddress input to access + // the jump table and it is implemented by changing HPackedSwitch to + // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress. + // See VisitMipsPackedSwitch() for the table-based implementation on R2. + GenTableBasedPackedSwitch(value_reg, + ZERO, + lower_bound, + num_entries, + switch_block, + default_block); + } else { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } +} + +void LocationsBuilderMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + // Constant area pointer (HMipsComputeBaseMethodAddress). + locations->SetInAt(1, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + Register constant_area = locations->InAt(1).AsRegister<Register>(); + HBasicBlock* switch_block = switch_instr->GetBlock(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // This is an R2-only path. HPackedSwitch has been changed to + // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress + // required to address the jump table relative to PC. + GenTableBasedPackedSwitch(value_reg, + constant_area, + lower_bound, + num_entries, + switch_block, + default_block); +} + void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( HMipsComputeBaseMethodAddress* insn) { LocationSummary* locations = diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 003998129e..956a466f9b 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -218,6 +218,14 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { MipsAssembler* GetAssembler() const { return assembler_; } + // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit + // instructions for N cases. + // Table-based packed switch generates approx. 11 32-bit instructions + // and N 32-bit data words for N cases. + // At N = 6 they come out as 18 and 17 32-bit words respectively. + // We switch to the table-based method starting with 7 cases. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -262,6 +270,17 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); auto GetImplicitNullChecker(HInstruction* instruction); + void GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); + void GenTableBasedPackedSwitch(Register value_reg, + Register constant_area, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); MipsAssembler* const assembler_; CodeGeneratorMIPS* const codegen_; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 149a71d1b9..97b7916b1a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1311,7 +1311,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ M(MipsComputeBaseMethodAddress, Instruction) \ - M(MipsDexCacheArraysBase, Instruction) + M(MipsDexCacheArraysBase, Instruction) \ + M(MipsPackedSwitch, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index de77245e17..36431c1fb9 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -66,6 +66,41 @@ class HMipsDexCacheArraysBase : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); }; +// Mips version of HPackedSwitch that holds a pointer to the base method address. +class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { + public: + HMipsPackedSwitch(int32_t start_value, + int32_t num_entries, + HInstruction* input, + HMipsComputeBaseMethodAddress* method_base, + uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None(), dex_pc), + start_value_(start_value), + num_entries_(num_entries) { + SetRawInputAt(0, input); + SetRawInputAt(1, method_base); + } + + bool IsControlFlow() const OVERRIDE { return true; } + + int32_t GetStartValue() const { return start_value_; } + + int32_t GetNumEntries() const { return num_entries_; } + + HBasicBlock* GetDefaultBlock() const { + // Last entry is the default block. + return GetBlock()->GetSuccessors()[num_entries_]; + } + + DECLARE_INSTRUCTION(MipsPackedSwitch); + + private: + const int32_t start_value_; + const int32_t num_entries_; + + DISALLOW_COPY_AND_ASSIGN(HMipsPackedSwitch); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index c6d297df4f..6006e6cf5d 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -92,6 +92,25 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } + void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + if (switch_insn->GetNumEntries() <= + InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { + return; + } + // We need to replace the HPackedSwitch with a HMipsPackedSwitch in order to + // address the constant area. + InitializePCRelativeBasePointer(); + HGraph* graph = GetGraph(); + HBasicBlock* block = switch_insn->GetBlock(); + HMipsPackedSwitch* mips_switch = new (graph->GetArena()) HMipsPackedSwitch( + switch_insn->GetStartValue(), + switch_insn->GetNumEntries(), + switch_insn->InputAt(0), + base_, + switch_insn->GetDexPc()); + block->ReplaceAndRemoveInstructionWith(switch_insn, mips_switch); + } + void HandleInvoke(HInvoke* invoke) { // If this is an invoke-static/-direct with PC-relative dex cache array // addressing, we need the PC-relative address base. diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 4b580b620f..b972c70eb9 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -230,12 +230,14 @@ void MipsAssembler::FinalizeCode() { DsFsmCommitLabel(); SetReorder(false); EmitLiterals(); + ReserveJumpTableSpace(); PromoteBranches(); } void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) { size_t number_of_delayed_adjust_pcs = cfi().NumberOfDelayedAdvancePCs(); EmitBranches(); + EmitJumpTables(); Assembler::FinalizeInstructions(region); PatchCFI(number_of_delayed_adjust_pcs); } @@ -1724,47 +1726,68 @@ void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits of type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) { - CHECK_EQ(is_call && is_literal, false); +void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) { OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); if (is_r6) { // R6 - if (is_literal) { - CHECK(!IsResolved()); - type_ = kR6Literal; - } else if (is_call) { - InitShortOrLong(offset_size, kR6Call, kR6LongCall); - } else { - switch (condition_) { - case kUncond: - InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); - break; - case kCondEQZ: - case kCondNEZ: - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; - break; - default: - InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); - break; - } + switch (initial_type) { + case kLabel: + CHECK(!IsResolved()); + type_ = kR6Label; + break; + case kLiteral: + CHECK(!IsResolved()); + type_ = kR6Literal; + break; + case kCall: + InitShortOrLong(offset_size, kR6Call, kR6LongCall); + break; + case kCondBranch: + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; + break; + default: + InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + break; + } + break; + default: + LOG(FATAL) << "Unexpected branch type " << initial_type; + UNREACHABLE(); } } else { // R2 - if (is_literal) { - CHECK(!IsResolved()); - type_ = kLiteral; - } else if (is_call) { - InitShortOrLong(offset_size, kCall, kLongCall); - } else { - switch (condition_) { - case kUncond: - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); - break; - default: - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); - break; - } + switch (initial_type) { + case kLabel: + CHECK(!IsResolved()); + type_ = kLabel; + break; + case kLiteral: + CHECK(!IsResolved()); + type_ = kLiteral; + break; + case kCall: + InitShortOrLong(offset_size, kCall, kLongCall); + break; + case kCondBranch: + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + break; + default: + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + break; + } + break; + default: + LOG(FATAL) << "Unexpected branch type " << initial_type; + UNREACHABLE(); } } old_type_ = type_; @@ -1804,7 +1827,7 @@ MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bo rhs_reg_(0), condition_(kUncond), delayed_instruction_(kUnfilledDelaySlot) { - InitializeType(is_call, /* is_literal */ false, is_r6); + InitializeType((is_call ? kCall : kCondBranch), is_r6); } MipsAssembler::Branch::Branch(bool is_r6, @@ -1862,10 +1885,14 @@ MipsAssembler::Branch::Branch(bool is_r6, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(/* is_call */ false, /* is_literal */ false, is_r6); + InitializeType(kCondBranch, is_r6); } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg) +MipsAssembler::Branch::Branch(bool is_r6, + uint32_t location, + Register dest_reg, + Register base_reg, + Type label_or_literal_type) : old_location_(location), location_(location), target_(kUnresolved), @@ -1879,7 +1906,7 @@ MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, } else { CHECK_NE(base_reg, ZERO); } - InitializeType(/* is_call */ false, /* is_literal */ true, is_r6); + InitializeType(label_or_literal_type, is_r6); } MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition( @@ -2007,12 +2034,16 @@ bool MipsAssembler::Branch::IsLong() const { case kUncondBranch: case kCondBranch: case kCall: + // R2 near label. + case kLabel: // R2 near literal. case kLiteral: // R6 short branches. case kR6UncondBranch: case kR6CondBranch: case kR6Call: + // R6 near label. + case kR6Label: // R6 near literal. case kR6Literal: return false; @@ -2020,12 +2051,16 @@ bool MipsAssembler::Branch::IsLong() const { case kLongUncondBranch: case kLongCondBranch: case kLongCall: + // R2 far label. + case kFarLabel: // R2 far literal. case kFarLiteral: // R6 long branches. case kR6LongUncondBranch: case kR6LongCondBranch: case kR6LongCall: + // R6 far label. + case kR6FarLabel: // R6 far literal. case kR6FarLiteral: return true; @@ -2096,6 +2131,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kCall: type_ = kLongCall; break; + // R2 near label. + case kLabel: + type_ = kFarLabel; + break; // R2 near literal. case kLiteral: type_ = kFarLiteral; @@ -2110,6 +2149,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kR6Call: type_ = kR6LongCall; break; + // R6 near label. + case kR6Label: + type_ = kR6FarLabel; + break; // R6 near literal. case kR6Literal: type_ = kR6FarLiteral; @@ -2123,6 +2166,8 @@ void MipsAssembler::Branch::PromoteToLong() { uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const { switch (branch->GetType()) { + case Branch::kLabel: + case Branch::kFarLabel: case Branch::kLiteral: case Branch::kFarLiteral: return GetLabelLocation(&pc_rel_base_label_); @@ -2132,7 +2177,7 @@ uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch } uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) { - // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or // `this->GetLocation()` for everything else. // If the branch is still unresolved or already long, nothing to do. if (IsLong() || !IsResolved()) { @@ -2170,6 +2215,8 @@ uint32_t MipsAssembler::Branch::GetOffsetLocation() const { uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const { switch (branch->GetType()) { + case Branch::kLabel: + case Branch::kFarLabel: case Branch::kLiteral: case Branch::kFarLiteral: return GetLabelLocation(&pc_rel_base_label_); @@ -2180,7 +2227,7 @@ uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Bra } uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const { - // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)` // for everything else. CHECK(IsResolved()); @@ -2457,6 +2504,13 @@ void MipsAssembler::Call(MipsLabel* label) { FinalizeLabeledBranch(label); } +void MipsAssembler::LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label) { + // Label address loads are treated as pseudo branches since they require very similar handling. + DCHECK(!label->IsBound()); + branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLabel); + FinalizeLabeledBranch(label); +} + Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) { DCHECK(size == 4u || size == 8u) << size; literals_.emplace_back(size, data); @@ -2468,13 +2522,17 @@ void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* l DCHECK_EQ(literal->GetSize(), 4u); MipsLabel* label = literal->GetLabel(); DCHECK(!label->IsBound()); - branches_.emplace_back(IsR6(), - buffer_.Size(), - dest_reg, - base_reg); + branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLiteral); FinalizeLabeledBranch(label); } +JumpTable* MipsAssembler::CreateJumpTable(std::vector<MipsLabel*>&& labels) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + return table; +} + void MipsAssembler::EmitLiterals() { if (!literals_.empty()) { // We don't support byte and half-word literals. @@ -2491,6 +2549,60 @@ void MipsAssembler::EmitLiterals() { } } +void MipsAssembler::ReserveJumpTableSpace() { + if (!jump_tables_.empty()) { + for (JumpTable& table : jump_tables_) { + MipsLabel* label = table.GetLabel(); + Bind(label); + + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + size_t required_capacity = orig_size + table.GetSize(); + if (required_capacity > buffer_.Capacity()) { + buffer_.ExtendCapacity(required_capacity); + } +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + // Fill the space with dummy data as the data is not final + // until the branches have been promoted. And we shouldn't + // be moving uninitialized data during branch promotion. + for (size_t cnt = table.GetData().size(), i = 0; i < cnt; i++) { + buffer_.Emit<uint32_t>(0x1abe1234u); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + } + } +} + +void MipsAssembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (here, jump tables) in the buffer. + overwriting_ = true; + + for (JumpTable& table : jump_tables_) { + MipsLabel* table_label = table.GetLabel(); + uint32_t start = GetLabelLocation(table_label); + overwrite_location_ = start; + + for (MipsLabel* target : table.GetData()) { + CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u); + // The table will contain target addresses relative to the table start. + uint32_t offset = GetLabelLocation(target) - start; + Emit(offset); + } + } + + overwriting_ = false; + } +} + void MipsAssembler::PromoteBranches() { // Promote short branches to long as necessary. bool changed; @@ -2539,12 +2651,16 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall + // R2 near label. + { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLabel // R2 near literal. { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLiteral // R2 long branches. { 9, 3, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongUncondBranch { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCondBranch { 6, 1, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCall + // R2 far label. + { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLabel // R2 far literal. { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral // R6 short branches. @@ -2552,12 +2668,16 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch // Exception: kOffset23 for beqzc/bnezc. { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call + // R6 near label. + { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Label // R6 near literal. { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Literal // R6 long branches. { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongUncondBranch { 3, 1, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCondBranch { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + // R6 far label. + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLabel // R6 far literal. { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral }; @@ -2614,6 +2734,12 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Emit(delayed_instruction); break; + // R2 near label. + case Branch::kLabel: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiu(lhs, rhs, offset); + break; // R2 near literal. case Branch::kLiteral: DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); @@ -2691,6 +2817,14 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Nop(); break; + // R2 far label. + case Branch::kFarLabel: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lui(AT, High16Bits(offset)); + Ori(AT, AT, Low16Bits(offset)); + Addu(lhs, AT, rhs); + break; // R2 far literal. case Branch::kFarLiteral: DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); @@ -2725,6 +2859,12 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Balc(offset); break; + // R6 near label. + case Branch::kR6Label: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiupc(lhs, offset); + break; // R6 near literal. case Branch::kR6Literal: DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); @@ -2759,6 +2899,14 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Jialc(AT, Low16Bits(offset)); break; + // R6 far label. + case Branch::kR6FarLabel: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); + offset += (offset & 0x8000) << 1; // Account for sign extension in addiu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Addiu(lhs, AT, Low16Bits(offset)); + break; // R6 far literal. case Branch::kR6FarLiteral: DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index d50c439418..099620ccb8 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -126,6 +126,36 @@ class Literal { DISALLOW_COPY_AND_ASSIGN(Literal); }; +// Jump table: table of labels emitted after the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(std::vector<MipsLabel*>&& labels) + : label_(), labels_(std::move(labels)) { + } + + uint32_t GetSize() const { + return static_cast<uint32_t>(labels_.size()) * sizeof(uint32_t); + } + + const std::vector<MipsLabel*>& GetData() const { + return labels_; + } + + MipsLabel* GetLabel() { + return &label_; + } + + const MipsLabel* GetLabel() const { + return &label_; + } + + private: + MipsLabel label_; + std::vector<MipsLabel*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class MipsExceptionSlowPath { public: @@ -158,6 +188,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi ds_fsm_state_(kExpectingLabel), ds_fsm_target_pc_(0), literals_(arena->Adapter(kArenaAllocAssembler)), + jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0), @@ -685,6 +716,11 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); } + // Load label address using the base register (for R2 only) or using PC-relative loads + // (for R6 only; base_reg must be ZERO). To be used with data labels in the literal / + // jump table area only and not with regular code labels. + void LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label); + // Create a new literal with the given data. Literal* NewLiteral(size_t size, const uint8_t* data); @@ -692,6 +728,12 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // (for R6 only; base_reg must be ZERO). void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal); + // Create a jump table for the given labels that will be emitted when finalizing. + // When the table is emitted, offsets will be relative to the location of the table. + // The table location is determined by the location of its label (the label precedes + // the table data) and should be loaded using LoadLabelAddress(). + JumpTable* CreateJumpTable(std::vector<MipsLabel*>&& labels); + // // Overridden common assembler high-level functionality. // @@ -935,24 +977,32 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi kUncondBranch, kCondBranch, kCall, + // R2 near label. + kLabel, // R2 near literal. kLiteral, // R2 long branches. kLongUncondBranch, kLongCondBranch, kLongCall, + // R2 far label. + kFarLabel, // R2 far literal. kFarLiteral, // R6 short branches. kR6UncondBranch, kR6CondBranch, kR6Call, + // R6 near label. + kR6Label, // R6 near literal. kR6Literal, // R6 long branches. kR6LongUncondBranch, kR6LongCondBranch, kR6LongCall, + // R6 far label. + kR6FarLabel, // R6 far literal. kR6FarLiteral, }; @@ -1009,8 +1059,12 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi BranchCondition condition, Register lhs_reg, Register rhs_reg); - // Literal. - Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg); + // Label address (in literal area) or literal. + Branch(bool is_r6, + uint32_t location, + Register dest_reg, + Register base_reg, + Type label_or_literal_type); // Some conditional branches with lhs = rhs are effectively NOPs, while some // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. @@ -1105,7 +1159,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi private: // Completes branch construction by determining and recording its type. - void InitializeType(bool is_call, bool is_literal, bool is_r6); + void InitializeType(Type initial_type, bool is_r6); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); @@ -1178,6 +1232,8 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const; void EmitLiterals(); + void ReserveJumpTableSpace(); + void EmitJumpTables(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -1227,6 +1283,9 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // without invalidating pointers and references to existing elements. ArenaDeque<Literal> literals_; + // Jump table list. + ArenaDeque<JumpTable> jump_tables_; + // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC // we get PC using the NAL instruction. This label marks the position within the assembler buffer // that PC (from NAL) points to. diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index fabb0962fb..750a94df02 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -309,6 +309,12 @@ TEST_F(AssemblerMIPS32r6Test, Lwpc) { DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc"); } +TEST_F(AssemblerMIPS32r6Test, Addiupc) { + // The comment from the Lwpc() test applies to this Addiupc() test as well. + const char* code = ".set imm, {imm}\naddiupc ${reg}, (imm - ((imm & 0x40000) << 1)) << 2"; + DriverStr(RepeatRIb(&mips::MipsAssembler::Addiupc, 19, code), "Addiupc"); +} + TEST_F(AssemblerMIPS32r6Test, Bitswap) { DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); } @@ -635,6 +641,40 @@ TEST_F(AssemblerMIPS32r6Test, StoreDToOffset) { DriverStr(expected, "StoreDToOffset"); } +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "lapc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "addiu $v0, $at, %lo(2f - 1b)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); +} + TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) { mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); __ LoadLiteral(mips::V0, mips::ZERO, literal); @@ -811,8 +851,7 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { DriverStr(expected, "LongBeqc"); } -// TODO: MipsAssembler::Addiupc -// MipsAssembler::Bc +// TODO: MipsAssembler::Bc // MipsAssembler::Jic // MipsAssembler::Jialc // MipsAssembler::Bltc diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 708bc3d50d..a92455fe60 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -2307,6 +2307,44 @@ TEST_F(AssemblerMIPSTest, LoadConst32) { DriverStr(expected, "LoadConst32"); } +TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) { + mips::MipsLabel label; + __ BindPcRelBaseLabel(); + __ LoadLabelAddress(mips::V0, mips::V1, &label); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "addiu $v0, $v1, %lo(2f - 1b)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddress"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) { + mips::MipsLabel label; + __ BindPcRelBaseLabel(); + __ LoadLabelAddress(mips::V0, mips::V1, &label); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "ori $at, $at, %lo(2f - 1b)\n" + "addu $v0, $at, $v1\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddress"); +} + TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); __ BindPcRelBaseLabel(); |