diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 33 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 27 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 66 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.h | 30 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips_test.cc | 80 |
7 files changed, 208 insertions, 38 deletions
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index e58f43e1bb..56df6b5289 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -7677,7 +7677,9 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { break; } if (has_irreducible_loops) { - codegen_->ClobberRA(); + if (load_kind != HLoadClass::LoadKind::kBootImageAddress) { + codegen_->ClobberRA(); + } break; } FALLTHROUGH_INTENDED; @@ -7894,7 +7896,9 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { break; } if (has_irreducible_loops) { - codegen_->ClobberRA(); + if (load_kind != HLoadString::LoadKind::kBootImageAddress) { + codegen_->ClobberRA(); + } break; } FALLTHROUGH_INTENDED; @@ -9026,6 +9030,15 @@ void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); + if (!codegen_->GetInstructionSetFeatures().IsR6()) { + uint32_t num_entries = switch_instr->GetNumEntries(); + if (num_entries > InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { + // When there's no HMipsComputeBaseMethodAddress input, R2 uses the NAL + // instruction to simulate PC-relative addressing when accessing the jump table. + // NAL clobbers RA. Make sure RA is preserved. + codegen_->ClobberRA(); + } + } } void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg, @@ -9109,13 +9122,17 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr HBasicBlock* switch_block = switch_instr->GetBlock(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - if (codegen_->GetInstructionSetFeatures().IsR6() && - num_entries > kPackedSwitchJumpTableThreshold) { + if (num_entries > kPackedSwitchJumpTableThreshold) { // R6 uses PC-relative addressing to access the jump table. - // R2, OTOH, requires an HMipsComputeBaseMethodAddress input to access - // the jump table and it is implemented by changing HPackedSwitch to - // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress. - // See VisitMipsPackedSwitch() for the table-based implementation on R2. + // + // R2, OTOH, uses an HMipsComputeBaseMethodAddress input (when available) + // to access the jump table and it is implemented by changing HPackedSwitch to + // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress (see + // VisitMipsPackedSwitch()). + // + // When there's no HMipsComputeBaseMethodAddress input (e.g. in presence of + // irreducible loops), R2 uses the NAL instruction to simulate PC-relative + // addressing. GenTableBasedPackedSwitch(value_reg, ZERO, lower_bound, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 39a07b82d1..828e7ffd1d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -144,7 +144,8 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { // Load the array length into our temporary. - uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + HArrayLength* length = array_length->AsArrayLength(); + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<Register>(), len_offset); length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); @@ -154,7 +155,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<Register>(), array_len); - if (mirror::kUseStringCompression) { + if (mirror::kUseStringCompression && length->IsStringLength()) { __ shrl(length_loc.AsRegister<Register>(), Immediate(1)); } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index c8032c25df..6de5e9cd4d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -195,7 +195,8 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { // Load the array length into our temporary. - uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + HArrayLength* length = array_length->AsArrayLength(); + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); @@ -205,7 +206,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<CpuRegister>(), array_len); - if (mirror::kUseStringCompression) { + if (mirror::kUseStringCompression && length->IsStringLength()) { __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1)); } } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 645915e2f8..69c6b94c6b 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1623,17 +1623,28 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r } // Prepare the new initialization. if (vector_mode_ == kVector) { - // Generate a [initial, 0, .., 0] vector. + // Generate a [initial, 0, .., 0] vector for add or + // a [initial, initial, .., initial] vector for min/max. HVecOperation* red_vector = new_red->AsVecOperation(); + HVecReduce::ReductionKind kind = GetReductionKind(red_vector); size_t vector_length = red_vector->GetVectorLength(); DataType::Type type = red_vector->GetPackedType(); - new_init = Insert(vector_preheader_, - new (global_allocator_) HVecSetScalars(global_allocator_, - &new_init, - type, - vector_length, - 1, - kNoDexPc)); + if (kind == HVecReduce::ReductionKind::kSum) { + new_init = Insert(vector_preheader_, + new (global_allocator_) HVecSetScalars(global_allocator_, + &new_init, + type, + vector_length, + 1, + kNoDexPc)); + } else { + new_init = Insert(vector_preheader_, + new (global_allocator_) HVecReplicateScalar(global_allocator_, + new_init, + type, + vector_length, + kNoDexPc)); + } } else { new_init = ReduceAndExtractIfNeeded(new_init); } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index b83e3f5471..e85645b446 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -3353,8 +3353,6 @@ MipsAssembler::Branch::Branch(bool is_r6, CHECK_NE(dest_reg, ZERO); if (is_r6) { CHECK_EQ(base_reg, ZERO); - } else { - CHECK_NE(base_reg, ZERO); } InitializeType(label_or_literal_type, is_r6); } @@ -3646,15 +3644,29 @@ uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch case Branch::kFarLabel: case Branch::kLiteral: case Branch::kFarLiteral: - return GetLabelLocation(&pc_rel_base_label_); + if (branch->GetRightRegister() != ZERO) { + return GetLabelLocation(&pc_rel_base_label_); + } + // For those label/literal loads which come with their own NAL instruction + // and don't depend on `pc_rel_base_label_` we can simply use the location + // of the "branch" (the NAL precedes the "branch" immediately). The location + // is close enough for the user of the returned location, PromoteIfNeeded(), + // to not miss needed promotion to a far load. + // (GetOffsetSizeNeeded() provides a little leeway by means of kMaxBranchSize, + // which is larger than all composite branches and label/literal loads: it's + // OK to promote a bit earlier than strictly necessary, it makes things + // simpler.) + FALLTHROUGH_INTENDED; default: return branch->GetLocation(); } } uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) { - // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or - // `this->GetLocation()` for everything else. + // `location` comes from GetBranchLocationOrPcRelBase() and is either the location + // of the PC-relative branch or (for some R2 label and literal loads) the location + // of `pc_rel_base_label_`. The PC-relative offset of the branch/load is relative + // to this location. // If the branch is still unresolved or already long, nothing to do. if (IsLong() || !IsResolved()) { return 0; @@ -3695,7 +3707,15 @@ uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Bra case Branch::kFarLabel: case Branch::kLiteral: case Branch::kFarLiteral: - return GetLabelLocation(&pc_rel_base_label_); + if (branch->GetRightRegister() == ZERO) { + // These loads don't use `pc_rel_base_label_` and instead rely on their own + // NAL instruction (it immediately precedes the "branch"). Therefore the + // effective PC-relative base register is RA and it corresponds to the 2nd + // instruction after the NAL. + return branch->GetLocation() + sizeof(uint32_t); + } else { + return GetLabelLocation(&pc_rel_base_label_); + } default: return branch->GetOffsetLocation() + Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t); @@ -3703,9 +3723,10 @@ uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Bra } uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const { - // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or - // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)` - // for everything else. + // `location` comes from GetBranchOrPcRelBaseForEncoding() and is either a location + // within/near the PC-relative branch or (for some R2 label and literal loads) the + // location of `pc_rel_base_label_`. The PC-relative offset of the branch/load is + // relative to this location. CHECK(IsResolved()); uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); // Calculate the byte distance between instructions and also account for @@ -4001,6 +4022,12 @@ void MipsAssembler::Call(MipsLabel* label, bool is_r6, bool is_bare) { void MipsAssembler::LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label) { // Label address loads are treated as pseudo branches since they require very similar handling. DCHECK(!label->IsBound()); + // If `pc_rel_base_label_` isn't bound or none of registers contains its address, we + // may generate an individual NAL instruction to simulate PC-relative addressing on R2 + // by specifying `base_reg` of `ZERO`. Check for it. + if (base_reg == ZERO && !IsR6()) { + Nal(); + } branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLabel); FinalizeLabeledBranch(label); } @@ -4016,6 +4043,12 @@ void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* l DCHECK_EQ(literal->GetSize(), 4u); MipsLabel* label = literal->GetLabel(); DCHECK(!label->IsBound()); + // If `pc_rel_base_label_` isn't bound or none of registers contains its address, we + // may generate an individual NAL instruction to simulate PC-relative addressing on R2 + // by specifying `base_reg` of `ZERO`. Check for it. + if (base_reg == ZERO && !IsR6()) { + Nal(); + } branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLiteral); FinalizeLabeledBranch(label); } @@ -4203,6 +4236,13 @@ static inline bool IsAbsorbableInstruction(uint32_t instruction) { } } +static inline Register GetR2PcRelBaseRegister(Register reg) { + // LoadLabelAddress() and LoadLiteral() generate individual NAL + // instructions on R2 when the specified base register is ZERO + // and so the effective PC-relative base register is RA, not ZERO. + return (reg == ZERO) ? RA : reg; +} + // Note: make sure branch_info_[] and EmitBranch() are kept synchronized. void MipsAssembler::EmitBranch(uint32_t branch_id) { CHECK_EQ(overwriting_, true); @@ -4293,13 +4333,13 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) { case Branch::kLabel: DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiu(lhs, rhs, offset); + Addiu(lhs, GetR2PcRelBaseRegister(rhs), offset); break; // R2 near literal. case Branch::kLiteral: DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Lw(lhs, rhs, offset); + Lw(lhs, GetR2PcRelBaseRegister(rhs), offset); break; // R2 long branches. @@ -4378,7 +4418,7 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) { CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); Ori(AT, AT, Low16Bits(offset)); - Addu(lhs, AT, rhs); + Addu(lhs, AT, GetR2PcRelBaseRegister(rhs)); break; // R2 far literal. case Branch::kFarLiteral: @@ -4386,7 +4426,7 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) { offset += (offset & 0x8000) << 1; // Account for sign extension in lw. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); - Addu(AT, AT, rhs); + Addu(AT, AT, GetR2PcRelBaseRegister(rhs)); Lw(lhs, AT, Low16Bits(offset)); break; diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 57b3edd03a..1c5b442557 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -1061,16 +1061,36 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); } - // Load label address using the base register (for R2 only) or using PC-relative loads - // (for R6 only; base_reg must be ZERO). To be used with data labels in the literal / - // jump table area only and not with regular code labels. + // Load label address using PC-relative addressing. + // To be used with data labels in the literal / jump table area only and not + // with regular code labels. + // + // For R6 base_reg must be ZERO. + // + // On R2 there are two possible uses w.r.t. base_reg: + // + // - base_reg = ZERO: + // The NAL instruction will be generated as part of the load and it will + // clobber the RA register. + // + // - base_reg != ZERO: + // The RA-clobbering NAL instruction won't be generated as part of the load. + // The label pc_rel_base_label_ must be bound (with BindPcRelBaseLabel()) + // and base_reg must hold the address of the label. Example: + // __ Nal(); + // __ Move(S3, RA); + // __ BindPcRelBaseLabel(); // S3 holds the address of pc_rel_base_label_. + // __ LoadLabelAddress(A0, S3, label1); + // __ LoadLabelAddress(A1, S3, label2); + // __ LoadLiteral(V0, S3, literal1); + // __ LoadLiteral(V1, S3, literal2); void LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label); // Create a new literal with the given data. Literal* NewLiteral(size_t size, const uint8_t* data); - // Load literal using the base register (for R2 only) or using PC-relative loads - // (for R6 only; base_reg must be ZERO). + // Load literal using PC-relative addressing. + // See the above comments for LoadLabelAddress() on the value of base_reg. void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal); // Create a jump table for the given labels that will be emitted when finalizing. diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index eed83a5528..9397be4c09 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -2913,6 +2913,46 @@ TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) { DriverStr(expected, "LoadNearestFarLabelAddress"); } +TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddressUsingNal) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + __ Bind(&label); + + std::string expected = + ".set noreorder\n" + "bltzal $zero, .+4\n" + "addiu $v0, $ra, %lo(2f - 1f)\n" + "1:\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n"; + DriverStr(expected, "LoadFarthestNearLabelAddressUsingNal"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddressUsingNal) { + mips::MipsLabel label; + __ LoadLabelAddress(mips::V0, mips::ZERO, &label); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + + std::string expected = + ".set noreorder\n" + "bltzal $zero, .+4\n" + "lui $at, %hi(2f - 1f)\n" + "1:\n" + "ori $at, $at, %lo(2f - 1b)\n" + "addu $v0, $at, $ra\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n"; + DriverStr(expected, "LoadNearestFarLabelAddressUsingNal"); +} + TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); __ BindPcRelBaseLabel(); @@ -2951,6 +2991,46 @@ TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { DriverStr(expected, "LoadNearestFarLiteral"); } +TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteralUsingNal) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + + std::string expected = + ".set noreorder\n" + "bltzal $zero, .+4\n" + "lw $v0, %lo(2f - 1f)($ra)\n" + "1:\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteralUsingNal"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLiteralUsingNal) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + ".set noreorder\n" + "bltzal $zero, .+4\n" + "lui $at, %hi(2f - 1f)\n" + "1:\n" + "addu $at, $at, $ra\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteralUsingNal"); +} + #undef __ } // namespace art |