diff options
| author | 2017-01-05 17:37:56 +0000 | |
|---|---|---|
| committer | 2017-01-05 17:37:57 +0000 | |
| commit | f67dadb5550ee2bd9db0b7b0b75d8c44ddf170d2 (patch) | |
| tree | 44f61f9bd0a674cae29c42e6dff72d4ff14189d0 | |
| parent | cda4b75615f5f11c101ff846a05affda405b101b (diff) | |
| parent | 0960ac5a5a255bb3e8418e185914243aeef54a7c (diff) | |
Merge "MIPS64: Implement table-based packed switch"
| -rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 78 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips64.h | 18 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.cc | 81 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64.h | 44 | ||||
| -rw-r--r-- | compiler/utils/mips64/assembler_mips64_test.cc | 31 |
5 files changed, 234 insertions, 18 deletions
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5cf3c246cf..36690c0569 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -4517,27 +4517,20 @@ void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - +void InstructionCodeGeneratorMIPS64::GenPackedSwitchWithCompares(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { // Create a set of compare/jumps. GpuRegister temp_reg = TMP; - if (IsInt<16>(-lower_bound)) { - __ Addiu(temp_reg, value_reg, -lower_bound); - } else { - __ LoadConst32(AT, -lower_bound); - __ Addu(temp_reg, value_reg, AT); - } + __ Addiu32(temp_reg, value_reg, -lower_bound); // Jump to default if index is negative // Note: We don't check the case that index is positive while value < lower_bound, because in // this case, index >= num_entries must be true. So that we can save one branch instruction. __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); // Jump to successors[0] if value == lower_bound. __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); int32_t last_index = 0; @@ -4555,11 +4548,66 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { __ Bc(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorMIPS64::GenTableBasedPackedSwitch(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Create a jump table. + std::vector<Mips64Label*> labels(num_entries); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels)); + + // Is the value in range? + __ Addiu32(TMP, value_reg, -lower_bound); + __ LoadConst32(AT, num_entries); + __ Bgeuc(TMP, AT, codegen_->GetLabelOf(default_block)); + + // We are in the range of the table. + // Load the target address from the jump table, indexing by the value. + __ LoadLabelAddress(AT, table->GetLabel()); + __ Sll(TMP, TMP, 2); + __ Daddu(TMP, TMP, AT); + __ Lw(TMP, TMP, 0); + // Compute the absolute target address by adding the table start address + // (the table contains offsets to targets relative to its start). + __ Daddu(TMP, TMP, AT); + // And jump. + __ Jr(TMP); + __ Nop(); +} + +void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); + HBasicBlock* switch_block = switch_instr->GetBlock(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + if (num_entries > kPackedSwitchJumpTableThreshold) { + GenTableBasedPackedSwitch(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } else { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } +} + void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) { UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64"; } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index d5811c20e3..8ac919f47e 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -217,6 +217,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { Mips64Assembler* GetAssembler() const { return assembler_; } + // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit + // instructions for N cases. + // Table-based packed switch generates approx. 11 32-bit instructions + // and N 32-bit data words for N cases. + // At N = 6 they come out as 18 and 17 32-bit words respectively. + // We switch to the table-based method starting with 7 cases. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -260,6 +268,16 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { LocationSummary* locations, Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); + void GenTableBasedPackedSwitch(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 5906a71b38..998f2c709b 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -35,12 +35,14 @@ void Mips64Assembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + ReserveJumpTableSpace(); EmitLiterals(); PromoteBranches(); } void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { EmitBranches(); + EmitJumpTables(); Assembler::FinalizeInstructions(region); PatchCFI(); } @@ -482,6 +484,10 @@ void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) { EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16); } +void Mips64Assembler::Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0xf, rs, rt, imm16); +} + void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) { EmitI(1, rs, static_cast<GpuRegister>(6), imm16); } @@ -1081,6 +1087,20 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { TemplateLoadConst64(this, rd, value); } +void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value) { + if (IsInt<16>(value)) { + Addiu(rt, rs, value); + } else { + int16_t high = High16Bits(value); + int16_t low = Low16Bits(value); + high += (low < 0) ? 1 : 0; // Account for sign extension in addiu. + Aui(rt, rs, high); + if (low != 0) { + Addiu(rt, rt, low); + } + } +} + void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -1653,6 +1673,67 @@ void Mips64Assembler::LoadLiteral(GpuRegister dest_reg, FinalizeLabeledBranch(label); } +JumpTable* Mips64Assembler::CreateJumpTable(std::vector<Mips64Label*>&& labels) { + jump_tables_.emplace_back(std::move(labels)); + JumpTable* table = &jump_tables_.back(); + DCHECK(!table->GetLabel()->IsBound()); + return table; +} + +void Mips64Assembler::ReserveJumpTableSpace() { + if (!jump_tables_.empty()) { + for (JumpTable& table : jump_tables_) { + Mips64Label* label = table.GetLabel(); + Bind(label); + + // Bulk ensure capacity, as this may be large. + size_t orig_size = buffer_.Size(); + size_t required_capacity = orig_size + table.GetSize(); + if (required_capacity > buffer_.Capacity()) { + buffer_.ExtendCapacity(required_capacity); + } +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = true; +#endif + + // Fill the space with dummy data as the data is not final + // until the branches have been promoted. And we shouldn't + // be moving uninitialized data during branch promotion. + for (size_t cnt = table.GetData().size(), i = 0; i < cnt; i++) { + buffer_.Emit<uint32_t>(0x1abe1234u); + } + +#ifndef NDEBUG + buffer_.has_ensured_capacity_ = false; +#endif + } + } +} + +void Mips64Assembler::EmitJumpTables() { + if (!jump_tables_.empty()) { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (here, jump tables) in the buffer. + overwriting_ = true; + + for (JumpTable& table : jump_tables_) { + Mips64Label* table_label = table.GetLabel(); + uint32_t start = GetLabelLocation(table_label); + overwrite_location_ = start; + + for (Mips64Label* target : table.GetData()) { + CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u); + // The table will contain target addresses relative to the table start. + uint32_t offset = GetLabelLocation(target) - start; + Emit(offset); + } + } + + overwriting_ = false; + } +} + void Mips64Assembler::EmitLiterals() { if (!literals_.empty()) { for (Literal& literal : literals_) { diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 7ef5ab0d39..a0a1db634d 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -357,6 +357,36 @@ class Literal { DISALLOW_COPY_AND_ASSIGN(Literal); }; +// Jump table: table of labels emitted after the code and before the literals. Similar to literals. +class JumpTable { + public: + explicit JumpTable(std::vector<Mips64Label*>&& labels) + : label_(), labels_(std::move(labels)) { + } + + size_t GetSize() const { + return labels_.size() * sizeof(uint32_t); + } + + const std::vector<Mips64Label*>& GetData() const { + return labels_; + } + + Mips64Label* GetLabel() { + return &label_; + } + + const Mips64Label* GetLabel() const { + return &label_; + } + + private: + Mips64Label label_; + std::vector<Mips64Label*> labels_; + + DISALLOW_COPY_AND_ASSIGN(JumpTable); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class Mips64ExceptionSlowPath { public: @@ -388,6 +418,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer overwrite_location_(0), literals_(arena->Adapter(kArenaAllocAssembler)), long_literals_(arena->Adapter(kArenaAllocAssembler)), + jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0) { @@ -480,6 +511,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Lwupc(GpuRegister rs, uint32_t imm19); // MIPS64 void Ldpc(GpuRegister rs, uint32_t imm18); // MIPS64 void Lui(GpuRegister rt, uint16_t imm16); + void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 void Sync(uint32_t stype); @@ -619,6 +651,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // This function is only used for testing purposes. void RecordLoadConst64Path(int value); + void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 void Bind(Label* label) OVERRIDE { @@ -676,6 +709,12 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // Load literal using PC-relative loads. void LoadLiteral(GpuRegister dest_reg, LoadOperandType load_type, Literal* literal); + // Create a jump table for the given labels that will be emitted when finalizing. + // When the table is emitted, offsets will be relative to the location of the table. + // The table location is determined by the location of its label (the label precedes + // the table data) and should be loaded using LoadLabelAddress(). + JumpTable* CreateJumpTable(std::vector<Mips64Label*>&& labels); + void Bc(Mips64Label* label); void Balc(Mips64Label* label); void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); @@ -1050,6 +1089,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer const Branch* GetBranch(uint32_t branch_id) const; void EmitLiterals(); + void ReserveJumpTableSpace(); + void EmitJumpTables(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -1073,6 +1114,9 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer ArenaDeque<Literal> literals_; ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons. + // Jump table list. + ArenaDeque<JumpTable> jump_tables_; + // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 564559f92c..f2cbebbfd7 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -1904,9 +1904,9 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) { DriverStr(expected, "StoreFpuToOffset"); } -/////////////////////// -// Loading Constants // -/////////////////////// +////////////////////////////// +// Loading/adding Constants // +////////////////////////////// TEST_F(AssemblerMIPS64Test, LoadConst32) { // IsUint<16>(value) @@ -1949,6 +1949,31 @@ TEST_F(AssemblerMIPS64Test, LoadConst32) { DriverStr(expected, "LoadConst32"); } +TEST_F(AssemblerMIPS64Test, Addiu32) { + __ Addiu32(mips64::A1, mips64::A2, -0x8000); + __ Addiu32(mips64::A1, mips64::A2, +0); + __ Addiu32(mips64::A1, mips64::A2, +0x7FFF); + __ Addiu32(mips64::A1, mips64::A2, -0x8001); + __ Addiu32(mips64::A1, mips64::A2, +0x8000); + __ Addiu32(mips64::A1, mips64::A2, -0x10000); + __ Addiu32(mips64::A1, mips64::A2, +0x10000); + __ Addiu32(mips64::A1, mips64::A2, +0x12345678); + + const char* expected = + "addiu $a1, $a2, -0x8000\n" + "addiu $a1, $a2, 0\n" + "addiu $a1, $a2, 0x7FFF\n" + "aui $a1, $a2, 0xFFFF\n" + "addiu $a1, $a1, 0x7FFF\n" + "aui $a1, $a2, 1\n" + "addiu $a1, $a1, -0x8000\n" + "aui $a1, $a2, 0xFFFF\n" + "aui $a1, $a2, 1\n" + "aui $a1, $a2, 0x1234\n" + "addiu $a1, $a1, 0x5678\n"; + DriverStr(expected, "Addiu32"); +} + static uint64_t SignExtend16To64(uint16_t n) { return static_cast<int16_t>(n); } |