summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/code_generator_mips.cc33
-rw-r--r--compiler/optimizing/code_generator_x86.cc5
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc5
-rw-r--r--compiler/optimizing/loop_optimization.cc27
-rw-r--r--compiler/utils/mips/assembler_mips.cc66
-rw-r--r--compiler/utils/mips/assembler_mips.h30
-rw-r--r--compiler/utils/mips/assembler_mips_test.cc80
7 files changed, 208 insertions, 38 deletions
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e58f43e1bb..56df6b5289 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -7677,7 +7677,9 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
break;
}
if (has_irreducible_loops) {
- codegen_->ClobberRA();
+ if (load_kind != HLoadClass::LoadKind::kBootImageAddress) {
+ codegen_->ClobberRA();
+ }
break;
}
FALLTHROUGH_INTENDED;
@@ -7894,7 +7896,9 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
break;
}
if (has_irreducible_loops) {
- codegen_->ClobberRA();
+ if (load_kind != HLoadString::LoadKind::kBootImageAddress) {
+ codegen_->ClobberRA();
+ }
break;
}
FALLTHROUGH_INTENDED;
@@ -9026,6 +9030,15 @@ void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
+ if (!codegen_->GetInstructionSetFeatures().IsR6()) {
+ uint32_t num_entries = switch_instr->GetNumEntries();
+ if (num_entries > InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) {
+ // When there's no HMipsComputeBaseMethodAddress input, R2 uses the NAL
+ // instruction to simulate PC-relative addressing when accessing the jump table.
+ // NAL clobbers RA. Make sure RA is preserved.
+ codegen_->ClobberRA();
+ }
+ }
}
void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg,
@@ -9109,13 +9122,17 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr
HBasicBlock* switch_block = switch_instr->GetBlock();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- if (codegen_->GetInstructionSetFeatures().IsR6() &&
- num_entries > kPackedSwitchJumpTableThreshold) {
+ if (num_entries > kPackedSwitchJumpTableThreshold) {
// R6 uses PC-relative addressing to access the jump table.
- // R2, OTOH, requires an HMipsComputeBaseMethodAddress input to access
- // the jump table and it is implemented by changing HPackedSwitch to
- // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress.
- // See VisitMipsPackedSwitch() for the table-based implementation on R2.
+ //
+ // R2, OTOH, uses an HMipsComputeBaseMethodAddress input (when available)
+ // to access the jump table and it is implemented by changing HPackedSwitch to
+ // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress (see
+ // VisitMipsPackedSwitch()).
+ //
+ // When there's no HMipsComputeBaseMethodAddress input (e.g. in presence of
+ // irreducible loops), R2 uses the NAL instruction to simulate PC-relative
+ // addressing.
GenTableBasedPackedSwitch(value_reg,
ZERO,
lower_bound,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 39a07b82d1..828e7ffd1d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -144,7 +144,8 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
// Load the array length into our temporary.
- uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+ HArrayLength* length = array_length->AsArrayLength();
+ uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
Location array_loc = array_length->GetLocations()->InAt(0);
Address array_len(array_loc.AsRegister<Register>(), len_offset);
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
@@ -154,7 +155,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
}
__ movl(length_loc.AsRegister<Register>(), array_len);
- if (mirror::kUseStringCompression) {
+ if (mirror::kUseStringCompression && length->IsStringLength()) {
__ shrl(length_loc.AsRegister<Register>(), Immediate(1));
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c8032c25df..6de5e9cd4d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -195,7 +195,8 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
InvokeRuntimeCallingConvention calling_convention;
if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
// Load the array length into our temporary.
- uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
+ HArrayLength* length = array_length->AsArrayLength();
+ uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
Location array_loc = array_length->GetLocations()->InAt(0);
Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
@@ -205,7 +206,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
}
__ movl(length_loc.AsRegister<CpuRegister>(), array_len);
- if (mirror::kUseStringCompression) {
+ if (mirror::kUseStringCompression && length->IsStringLength()) {
__ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
}
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 645915e2f8..69c6b94c6b 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1623,17 +1623,28 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r
}
// Prepare the new initialization.
if (vector_mode_ == kVector) {
- // Generate a [initial, 0, .., 0] vector.
+ // Generate a [initial, 0, .., 0] vector for add or
+ // a [initial, initial, .., initial] vector for min/max.
HVecOperation* red_vector = new_red->AsVecOperation();
+ HVecReduce::ReductionKind kind = GetReductionKind(red_vector);
size_t vector_length = red_vector->GetVectorLength();
DataType::Type type = red_vector->GetPackedType();
- new_init = Insert(vector_preheader_,
- new (global_allocator_) HVecSetScalars(global_allocator_,
- &new_init,
- type,
- vector_length,
- 1,
- kNoDexPc));
+ if (kind == HVecReduce::ReductionKind::kSum) {
+ new_init = Insert(vector_preheader_,
+ new (global_allocator_) HVecSetScalars(global_allocator_,
+ &new_init,
+ type,
+ vector_length,
+ 1,
+ kNoDexPc));
+ } else {
+ new_init = Insert(vector_preheader_,
+ new (global_allocator_) HVecReplicateScalar(global_allocator_,
+ new_init,
+ type,
+ vector_length,
+ kNoDexPc));
+ }
} else {
new_init = ReduceAndExtractIfNeeded(new_init);
}
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index b83e3f5471..e85645b446 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -3353,8 +3353,6 @@ MipsAssembler::Branch::Branch(bool is_r6,
CHECK_NE(dest_reg, ZERO);
if (is_r6) {
CHECK_EQ(base_reg, ZERO);
- } else {
- CHECK_NE(base_reg, ZERO);
}
InitializeType(label_or_literal_type, is_r6);
}
@@ -3646,15 +3644,29 @@ uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch
case Branch::kFarLabel:
case Branch::kLiteral:
case Branch::kFarLiteral:
- return GetLabelLocation(&pc_rel_base_label_);
+ if (branch->GetRightRegister() != ZERO) {
+ return GetLabelLocation(&pc_rel_base_label_);
+ }
+ // For those label/literal loads which come with their own NAL instruction
+ // and don't depend on `pc_rel_base_label_` we can simply use the location
+ // of the "branch" (the NAL precedes the "branch" immediately). The location
+ // is close enough for the user of the returned location, PromoteIfNeeded(),
+ // to not miss needed promotion to a far load.
+ // (GetOffsetSizeNeeded() provides a little leeway by means of kMaxBranchSize,
+ // which is larger than all composite branches and label/literal loads: it's
+ // OK to promote a bit earlier than strictly necessary, it makes things
+ // simpler.)
+ FALLTHROUGH_INTENDED;
default:
return branch->GetLocation();
}
}
uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) {
- // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or
- // `this->GetLocation()` for everything else.
+ // `location` comes from GetBranchLocationOrPcRelBase() and is either the location
+ // of the PC-relative branch or (for some R2 label and literal loads) the location
+ // of `pc_rel_base_label_`. The PC-relative offset of the branch/load is relative
+ // to this location.
// If the branch is still unresolved or already long, nothing to do.
if (IsLong() || !IsResolved()) {
return 0;
@@ -3695,7 +3707,15 @@ uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Bra
case Branch::kFarLabel:
case Branch::kLiteral:
case Branch::kFarLiteral:
- return GetLabelLocation(&pc_rel_base_label_);
+ if (branch->GetRightRegister() == ZERO) {
+ // These loads don't use `pc_rel_base_label_` and instead rely on their own
+ // NAL instruction (it immediately precedes the "branch"). Therefore the
+ // effective PC-relative base register is RA and it corresponds to the 2nd
+ // instruction after the NAL.
+ return branch->GetLocation() + sizeof(uint32_t);
+ } else {
+ return GetLabelLocation(&pc_rel_base_label_);
+ }
default:
return branch->GetOffsetLocation() +
Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t);
@@ -3703,9 +3723,10 @@ uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Bra
}
uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const {
- // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 labels/literals or
- // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)`
- // for everything else.
+ // `location` comes from GetBranchOrPcRelBaseForEncoding() and is either a location
+ // within/near the PC-relative branch or (for some R2 label and literal loads) the
+ // location of `pc_rel_base_label_`. The PC-relative offset of the branch/load is
+ // relative to this location.
CHECK(IsResolved());
uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
// Calculate the byte distance between instructions and also account for
@@ -4001,6 +4022,12 @@ void MipsAssembler::Call(MipsLabel* label, bool is_r6, bool is_bare) {
void MipsAssembler::LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label) {
// Label address loads are treated as pseudo branches since they require very similar handling.
DCHECK(!label->IsBound());
+ // If `pc_rel_base_label_` isn't bound or none of registers contains its address, we
+ // may generate an individual NAL instruction to simulate PC-relative addressing on R2
+ // by specifying `base_reg` of `ZERO`. Check for it.
+ if (base_reg == ZERO && !IsR6()) {
+ Nal();
+ }
branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLabel);
FinalizeLabeledBranch(label);
}
@@ -4016,6 +4043,12 @@ void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* l
DCHECK_EQ(literal->GetSize(), 4u);
MipsLabel* label = literal->GetLabel();
DCHECK(!label->IsBound());
+ // If `pc_rel_base_label_` isn't bound or none of registers contains its address, we
+ // may generate an individual NAL instruction to simulate PC-relative addressing on R2
+ // by specifying `base_reg` of `ZERO`. Check for it.
+ if (base_reg == ZERO && !IsR6()) {
+ Nal();
+ }
branches_.emplace_back(IsR6(), buffer_.Size(), dest_reg, base_reg, Branch::kLiteral);
FinalizeLabeledBranch(label);
}
@@ -4203,6 +4236,13 @@ static inline bool IsAbsorbableInstruction(uint32_t instruction) {
}
}
+static inline Register GetR2PcRelBaseRegister(Register reg) {
+ // LoadLabelAddress() and LoadLiteral() generate individual NAL
+ // instructions on R2 when the specified base register is ZERO
+ // and so the effective PC-relative base register is RA, not ZERO.
+ return (reg == ZERO) ? RA : reg;
+}
+
// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
void MipsAssembler::EmitBranch(uint32_t branch_id) {
CHECK_EQ(overwriting_, true);
@@ -4293,13 +4333,13 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) {
case Branch::kLabel:
DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
- Addiu(lhs, rhs, offset);
+ Addiu(lhs, GetR2PcRelBaseRegister(rhs), offset);
break;
// R2 near literal.
case Branch::kLiteral:
DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
- Lw(lhs, rhs, offset);
+ Lw(lhs, GetR2PcRelBaseRegister(rhs), offset);
break;
// R2 long branches.
@@ -4378,7 +4418,7 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) {
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Lui(AT, High16Bits(offset));
Ori(AT, AT, Low16Bits(offset));
- Addu(lhs, AT, rhs);
+ Addu(lhs, AT, GetR2PcRelBaseRegister(rhs));
break;
// R2 far literal.
case Branch::kFarLiteral:
@@ -4386,7 +4426,7 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) {
offset += (offset & 0x8000) << 1; // Account for sign extension in lw.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Lui(AT, High16Bits(offset));
- Addu(AT, AT, rhs);
+ Addu(AT, AT, GetR2PcRelBaseRegister(rhs));
Lw(lhs, AT, Low16Bits(offset));
break;
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 57b3edd03a..1c5b442557 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -1061,16 +1061,36 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
}
- // Load label address using the base register (for R2 only) or using PC-relative loads
- // (for R6 only; base_reg must be ZERO). To be used with data labels in the literal /
- // jump table area only and not with regular code labels.
+ // Load label address using PC-relative addressing.
+ // To be used with data labels in the literal / jump table area only and not
+ // with regular code labels.
+ //
+ // For R6 base_reg must be ZERO.
+ //
+ // On R2 there are two possible uses w.r.t. base_reg:
+ //
+ // - base_reg = ZERO:
+ // The NAL instruction will be generated as part of the load and it will
+ // clobber the RA register.
+ //
+ // - base_reg != ZERO:
+ // The RA-clobbering NAL instruction won't be generated as part of the load.
+ // The label pc_rel_base_label_ must be bound (with BindPcRelBaseLabel())
+ // and base_reg must hold the address of the label. Example:
+ // __ Nal();
+ // __ Move(S3, RA);
+ // __ BindPcRelBaseLabel(); // S3 holds the address of pc_rel_base_label_.
+ // __ LoadLabelAddress(A0, S3, label1);
+ // __ LoadLabelAddress(A1, S3, label2);
+ // __ LoadLiteral(V0, S3, literal1);
+ // __ LoadLiteral(V1, S3, literal2);
void LoadLabelAddress(Register dest_reg, Register base_reg, MipsLabel* label);
// Create a new literal with the given data.
Literal* NewLiteral(size_t size, const uint8_t* data);
- // Load literal using the base register (for R2 only) or using PC-relative loads
- // (for R6 only; base_reg must be ZERO).
+ // Load literal using PC-relative addressing.
+ // See the above comments for LoadLabelAddress() on the value of base_reg.
void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal);
// Create a jump table for the given labels that will be emitted when finalizing.
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index eed83a5528..9397be4c09 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2913,6 +2913,46 @@ TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) {
DriverStr(expected, "LoadNearestFarLabelAddress");
}
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddressUsingNal) {
+ mips::MipsLabel label;
+ __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+ constexpr size_t kAddiuCount = 0x1FDE;
+ for (size_t i = 0; i != kAddiuCount; ++i) {
+ __ Addiu(mips::A0, mips::A1, 0);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ ".set noreorder\n"
+ "bltzal $zero, .+4\n"
+ "addiu $v0, $ra, %lo(2f - 1f)\n"
+ "1:\n" +
+ RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+ "2:\n";
+ DriverStr(expected, "LoadFarthestNearLabelAddressUsingNal");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddressUsingNal) {
+ mips::MipsLabel label;
+ __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+ constexpr size_t kAdduCount = 0x1FDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ ".set noreorder\n"
+ "bltzal $zero, .+4\n"
+ "lui $at, %hi(2f - 1f)\n"
+ "1:\n"
+ "ori $at, $at, %lo(2f - 1b)\n"
+ "addu $v0, $at, $ra\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n";
+ DriverStr(expected, "LoadNearestFarLabelAddressUsingNal");
+}
+
TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
__ BindPcRelBaseLabel();
@@ -2951,6 +2991,46 @@ TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
DriverStr(expected, "LoadNearestFarLiteral");
}
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteralUsingNal) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips::V0, mips::ZERO, literal);
+ constexpr size_t kAddiuCount = 0x1FDE;
+ for (size_t i = 0; i != kAddiuCount; ++i) {
+ __ Addiu(mips::A0, mips::A1, 0);
+ }
+
+ std::string expected =
+ ".set noreorder\n"
+ "bltzal $zero, .+4\n"
+ "lw $v0, %lo(2f - 1f)($ra)\n"
+ "1:\n" +
+ RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadFarthestNearLiteralUsingNal");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteralUsingNal) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips::V0, mips::ZERO, literal);
+ constexpr size_t kAdduCount = 0x1FDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+
+ std::string expected =
+ ".set noreorder\n"
+ "bltzal $zero, .+4\n"
+ "lui $at, %hi(2f - 1f)\n"
+ "1:\n"
+ "addu $at, $at, $ra\n"
+ "lw $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadNearestFarLiteralUsingNal");
+}
+
#undef __
} // namespace art