summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc108
-rw-r--r--compiler/optimizing/code_generator_arm64.h4
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc238
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h6
-rw-r--r--compiler/optimizing/code_generator_mips.cc384
-rw-r--r--compiler/optimizing/code_generator_mips.h3
-rw-r--r--compiler/optimizing/code_generator_mips64.cc176
-rw-r--r--compiler/optimizing/code_generator_mips64.h4
-rw-r--r--compiler/optimizing/code_generator_x86.cc208
-rw-r--r--compiler/optimizing/code_generator_x86.h3
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc171
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/codegen_test.cc2
-rw-r--r--compiler/optimizing/gvn.cc4
-rw-r--r--compiler/optimizing/induction_var_range.cc33
-rw-r--r--compiler/optimizing/instruction_simplifier.cc40
-rw-r--r--compiler/optimizing/intrinsics.cc2
-rw-r--r--compiler/optimizing/intrinsics.h8
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc115
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc258
-rw-r--r--compiler/optimizing/intrinsics_mips.cc604
-rw-r--r--compiler/optimizing/intrinsics_mips.h1
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc305
-rw-r--r--compiler/optimizing/intrinsics_mips64.h2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc277
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc202
-rw-r--r--compiler/optimizing/loop_optimization.cc176
-rw-r--r--compiler/optimizing/nodes.h73
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h4
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc5
-rw-r--r--compiler/optimizing/scheduler.cc2
31 files changed, 1641 insertions, 1781 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 6bf3d86808..bc687e845b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -78,6 +78,7 @@ using helpers::OutputFPRegister;
using helpers::OutputRegister;
using helpers::QRegisterFrom;
using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
using helpers::StackOperandFrom;
using helpers::VIXLRegCodeFromART;
using helpers::WRegisterFrom;
@@ -5489,6 +5490,113 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
}
}
+// TODO: integrate with HandleBinaryOp?
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1 = locations->InAt(0);
+ Location op2 = locations->InAt(1);
+ Location out = locations->Out();
+
+ Register op1_reg;
+ Register op2_reg;
+ Register out_reg;
+ if (type == DataType::Type::kInt64) {
+ op1_reg = XRegisterFrom(op1);
+ op2_reg = XRegisterFrom(op2);
+ out_reg = XRegisterFrom(out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ op1_reg = WRegisterFrom(op1);
+ op2_reg = WRegisterFrom(op2);
+ out_reg = WRegisterFrom(out);
+ }
+
+ __ Cmp(op1_reg, op2_reg);
+ __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1 = locations->InAt(0);
+ Location op2 = locations->InAt(1);
+ Location out = locations->Out();
+
+ FPRegister op1_reg;
+ FPRegister op2_reg;
+ FPRegister out_reg;
+ if (type == DataType::Type::kFloat64) {
+ op1_reg = DRegisterFrom(op1);
+ op2_reg = DRegisterFrom(op2);
+ out_reg = DRegisterFrom(out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ op1_reg = SRegisterFrom(op1);
+ op2_reg = SRegisterFrom(op2);
+ out_reg = SRegisterFrom(out);
+ }
+
+ if (is_min) {
+ __ Fmin(out_reg, op1_reg, op2_reg);
+ } else {
+ __ Fmax(out_reg, op1_reg, op2_reg);
+ }
+}
+
+// TODO: integrate with HandleBinaryOp?
+void InstructionCodeGeneratorARM64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderARM64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARM64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d78ad87c51..cb61b69609 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -273,6 +273,10 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleCondition(HCondition* instruction);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 13d5764280..94438df898 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -4690,6 +4690,244 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ vixl32::Register op1 = RegisterFrom(op1_loc);
+ vixl32::Register op2 = RegisterFrom(op2_loc);
+ vixl32::Register out = RegisterFrom(out_loc);
+
+ __ Cmp(op1, op2);
+
+ {
+ ExactAssemblyScope aas(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+
+ __ ite(is_min ? lt : gt);
+ __ mov(is_min ? lt : gt, out, op1);
+ __ mov(is_min ? ge : le, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
+ vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
+ vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
+ vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
+ vixl32::Register out_lo = LowRegisterFrom(out_loc);
+ vixl32::Register out_hi = HighRegisterFrom(out_loc);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp = temps.Acquire();
+
+ DCHECK(op1_lo.Is(out_lo));
+ DCHECK(op1_hi.Is(out_hi));
+
+ // Compare op1 >= op2, or op1 < op2.
+ __ Cmp(out_lo, op2_lo);
+ __ Sbcs(temp, out_hi, op2_hi);
+
+ // Now GE/LT condition code is correct for the long comparison.
+ {
+ vixl32::ConditionType cond = is_min ? ge : lt;
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ itt(cond);
+ __ mov(cond, out_lo, op2_lo);
+ __ mov(cond, out_hi, op2_hi);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::SRegister op1 = SRegisterFrom(op1_loc);
+ vixl32::SRegister op2 = SRegisterFrom(op2_loc);
+ vixl32::SRegister out = SRegisterFrom(out_loc);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp1 = temps.Acquire();
+ vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
+ vixl32::Label nan, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F32, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
+ __ Vmov(temp1, op1);
+ __ Vmov(temp2, op2);
+ if (is_min) {
+ __ Orr(temp1, temp1, temp2);
+ } else {
+ __ And(temp1, temp1, temp2);
+ }
+ __ Vmov(out, temp1);
+ __ B(final_label);
+
+ // handle NaN input.
+ __ Bind(&nan);
+ __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
+ __ Vmov(out, temp1);
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
+ return;
+ }
+
+ vixl32::DRegister op1 = DRegisterFrom(op1_loc);
+ vixl32::DRegister op2 = DRegisterFrom(op2_loc);
+ vixl32::DRegister out = DRegisterFrom(out_loc);
+ vixl32::Label handle_nan_eq, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F64, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0).
+ if (!is_min) {
+ __ Vand(F64, out, op1, op2);
+ __ B(final_label);
+ }
+
+ // handle op1 == op2, min(+0.0,-0.0), NaN input.
+ __ Bind(&handle_nan_eq);
+ __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kInt64:
+ GenerateMinMaxLong(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ GenerateMinMaxFloat(minmax, is_min);
+ break;
+ case DataType::Type::kFloat64:
+ GenerateMinMaxDouble(minmax, is_min);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 8a2ab711f2..054acbc390 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -349,6 +349,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxLong(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFloat(HInstruction* minmax, bool is_min);
+ void GenerateMinMaxDouble(HInstruction* minmax, bool is_min);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 931c9b033e..11c1163119 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -8804,6 +8804,390 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ if (isR6) {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
+ // always change the target (output) register. If the condition is
+ // true the output register gets the contents of the "rs" register;
+ // otherwise, the output register is set to zero. One consequence
+ // of this is that to implement something like "rd = c==0 ? rs : rt"
+ // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
+ // After executing this pair of instructions one of the output
+ // registers from the pair will necessarily contain zero. Then the
+ // code ORs the output registers from the SELEQZ/SELNEZ instructions
+ // to get the final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, b_hi, a_hi);
+ __ Bne(b_hi, a_hi, &compare_done);
+
+ __ Sltu(TMP, b_lo, a_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ __ Seleqz(AT, a_lo, TMP);
+ __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
+ // because at this point we're
+ // done using a_lo/b_lo.
+ } else {
+ __ Selnez(AT, a_lo, TMP);
+ __ Seleqz(out_lo, b_lo, TMP); // ditto
+ }
+ __ Or(out_lo, out_lo, AT);
+ if (is_min) {
+ __ Seleqz(AT, a_hi, TMP);
+ __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ } else {
+ __ Selnez(AT, a_hi, TMP);
+ __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ }
+ __ Or(out_hi, out_hi, AT);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, b, a);
+ if (is_min) {
+ __ Seleqz(TMP, a, AT);
+ __ Selnez(AT, b, AT);
+ } else {
+ __ Selnez(TMP, a, AT);
+ __ Seleqz(AT, b, AT);
+ }
+ __ Or(out, TMP, AT);
+ }
+ }
+ } else { // !isR6
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, a_hi, b_hi);
+ __ Bne(a_hi, b_hi, &compare_done);
+
+ __ Sltu(TMP, a_lo, b_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ if (out_lo != a_lo) {
+ __ Movn(out_hi, a_hi, TMP);
+ __ Movn(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movz(out_hi, b_hi, TMP);
+ __ Movz(out_lo, b_lo, TMP);
+ }
+ } else {
+ if (out_lo != a_lo) {
+ __ Movz(out_hi, a_hi, TMP);
+ __ Movz(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movn(out_hi, b_hi, TMP);
+ __ Movn(out_lo, b_lo, TMP);
+ }
+ }
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, a, b);
+ if (is_min) {
+ if (out != a) {
+ __ Movn(out, a, AT);
+ }
+ if (out != b) {
+ __ Movz(out, b, AT);
+ }
+ } else {
+ if (out != a) {
+ __ Movz(out, a, AT);
+ }
+ if (out != b) {
+ __ Movn(out, b, AT);
+ }
+ }
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
+
+ if (isR6) {
+ MipsLabel noNaNs;
+ MipsLabel done;
+ FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+
+ } else { // !isR6
+ MipsLabel ordered;
+ MipsLabel compare;
+ MipsLabel select;
+ MipsLabel done;
+
+ if (type == DataType::Type::kFloat64) {
+ __ CunD(a, b);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CunS(a, b);
+ }
+ __ Bc1f(&ordered);
+
+ // a or b (or both) is a NaN. Return one, which is a NaN.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(b, b);
+ } else {
+ __ CeqS(b, b);
+ }
+ __ B(&select);
+
+ __ Bind(&ordered);
+
+ // Neither is a NaN.
+ // a == b? (-0.0 compares equal with +0.0)
+ // If equal, handle zeroes, else compare further.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(a, b);
+ } else {
+ __ CeqS(a, b);
+ }
+ __ Bc1f(&compare);
+
+ // a == b either bit for bit or one is -0.0 and the other is +0.0.
+ if (type == DataType::Type::kFloat64) {
+ __ MoveFromFpuHigh(TMP, a);
+ __ MoveFromFpuHigh(AT, b);
+ } else {
+ __ Mfc1(TMP, a);
+ __ Mfc1(AT, b);
+ }
+
+ if (is_min) {
+ // -0.0 prevails over +0.0.
+ __ Or(TMP, TMP, AT);
+ } else {
+ // +0.0 prevails over -0.0.
+ __ And(TMP, TMP, AT);
+ }
+
+ if (type == DataType::Type::kFloat64) {
+ __ Mfc1(AT, a);
+ __ Mtc1(AT, out);
+ __ MoveToFpuHigh(TMP, out);
+ } else {
+ __ Mtc1(TMP, out);
+ }
+ __ B(&done);
+
+ __ Bind(&compare);
+
+ if (type == DataType::Type::kFloat64) {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeD(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeD(b, a); // b <= a
+ }
+ } else {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeS(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeS(b, a); // b <= a
+ }
+ }
+
+ __ Bind(&select);
+
+ if (type == DataType::Type::kFloat64) {
+ __ MovtD(out, a);
+ __ MovfD(out, b);
+ } else {
+ __ MovtS(out, a);
+ __ MovfS(out, b);
+ }
+
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderMIPS::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 3cb3b52807..2be8e2ead9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -246,6 +246,9 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation*, bool is_min);
void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6);
// Generate a heap reference load using one register `out`:
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 78db1a397c..d08a0658ec 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -6690,6 +6690,182 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+ if (lhs == rhs) {
+ if (out != lhs) {
+ __ Move(out, lhs);
+ }
+ } else {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+ // change the target (output) register. If the condition is true the
+ // output register gets the contents of the "rs" register; otherwise,
+ // the output register is set to zero. One consequence of this is
+ // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+ // needs to use a pair of SELEQZ/SELNEZ instructions. After
+ // executing this pair of instructions one of the output registers
+ // from the pair will necessarily contain zero. Then the code ORs the
+ // output registers from the SELEQZ/SELNEZ instructions to get the
+ // final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (out == lhs) {
+ __ Slt(AT, rhs, lhs);
+ if (is_min) {
+ __ Seleqz(out, lhs, AT);
+ __ Selnez(AT, rhs, AT);
+ } else {
+ __ Selnez(out, lhs, AT);
+ __ Seleqz(AT, rhs, AT);
+ }
+ } else {
+ __ Slt(AT, lhs, rhs);
+ if (is_min) {
+ __ Seleqz(out, rhs, AT);
+ __ Selnez(AT, lhs, AT);
+ } else {
+ __ Selnez(out, rhs, AT);
+ __ Seleqz(AT, lhs, AT);
+ }
+ }
+ __ Or(out, out, AT);
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+ Mips64Label noNaNs;
+ Mips64Label done;
+ FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 9ff3f63384..5d40307cc7 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -242,6 +242,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b9ecfeebb6..528930a503 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400;
static constexpr int kFakeReturnRegister = Register(8);
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
@@ -3802,6 +3805,211 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ // Register to use to perform a long subtract to set cc.
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ if (type == DataType::Type::kInt64) {
+ // Need to perform a subtract to get the sign right.
+ // op1 is already in the same location as the output.
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
+ Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
+
+ // The comparison is performed by subtracting the second operand from
+ // the first operand and then setting the status flags in the same
+ // manner as the SUB instruction."
+ __ cmpl(output_lo, op2_lo);
+
+ // Now use a temp and the borrow to finish the subtraction of op2_hi.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(temp, output_hi);
+ __ sbbl(temp, op2_hi);
+
+ // Now the condition code is correct.
+ Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
+ __ cmovl(cond, output_lo, op2_lo);
+ __ cmovl(cond, output_hi, op2_hi);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register out = locations->Out().AsRegister<Register>();
+ Register op2 = op2_loc.AsRegister<Register>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ __ cmpl(out, op2);
+ Condition cond = is_min ? Condition::kGreater : Condition::kLess;
+ __ cmovl(cond, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ // TODO: Use a constant from the constant table (requires extra input).
+ __ LoadLongConstant(out, kDoubleNaN);
+ } else {
+ Register constant = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(constant, Immediate(kFloatNaN));
+ __ movd(out, constant);
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderX86::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index e21ccb5fe3..2dc34e859e 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -225,6 +225,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateShlLong(const Location& loc, int shift);
void GenerateShrLong(const Location& loc, int shift);
void GenerateUShrLong(const Location& loc, int shift);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 728e375ad8..d5997245ef 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3842,6 +3842,177 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+ // the second input to be the output (we can simply swap inputs).
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ if (type == DataType::Type::kInt64) {
+ __ cmpq(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ cmpl(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
+ } else {
+ __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86_64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86_64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 0637b274f6..5c8ed6cd8c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -222,6 +222,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index ba4040acad..a0fd5ffcb1 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -18,6 +18,7 @@
#include <memory>
#include "base/macros.h"
+#include "base/utils.h"
#include "builder.h"
#include "codegen_test_utils.h"
#include "dex/dex_file.h"
@@ -26,7 +27,6 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "register_allocator_linear_scan.h"
-#include "utils.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/mips/managed_register_mips.h"
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 71c394ec1f..f05159b735 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -17,11 +17,11 @@
#include "gvn.h"
#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
-#include "base/bit_vector-inl.h"
+#include "base/utils.h"
#include "side_effects_analysis.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index d699d01ecb..0a310ca940 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -78,16 +78,10 @@ static bool IsGEZero(HInstruction* instruction) {
DCHECK(instruction != nullptr);
if (instruction->IsArrayLength()) {
return true;
- } else if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // Instruction MIN(>=0, >=0) is >= 0.
- return IsGEZero(instruction->InputAt(0)) &&
- IsGEZero(instruction->InputAt(1));
- default:
- break;
- }
+ } else if (instruction->IsMin()) {
+ // Instruction MIN(>=0, >=0) is >= 0.
+ return IsGEZero(instruction->InputAt(0)) &&
+ IsGEZero(instruction->InputAt(1));
} else if (instruction->IsAbs()) {
// Instruction ABS(>=0) is >= 0.
// NOTE: ABS(minint) = minint prevents assuming
@@ -101,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) {
/** Hunts "under the hood" for a suitable instruction at the hint. */
static bool IsMaxAtHint(
HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
- if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // For MIN(x, y), return most suitable x or y as maximum.
- return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
- IsMaxAtHint(instruction->InputAt(1), hint, suitable);
- default:
- break;
- }
+ if (instruction->IsMin()) {
+ // For MIN(x, y), return most suitable x or y as maximum.
+ return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+ IsMaxAtHint(instruction->InputAt(1), hint, suitable);
} else {
*suitable = instruction;
return HuntForDeclaration(instruction) == hint;
}
- return false;
}
/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
@@ -364,11 +351,11 @@ void InductionVarRange::Replace(HInstruction* instruction,
}
}
-bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const {
+bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const {
HInductionVarAnalysis::InductionInfo *trip =
induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
if (trip != nullptr && !IsUnsafeTripCount(trip)) {
- IsConstant(trip->op_a, kExact, tc);
+ IsConstant(trip->op_a, kExact, trip_count);
return true;
}
return false;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index eac8d2f593..34837700a2 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -120,6 +120,8 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void SimplifyReturnThis(HInvoke* invoke);
void SimplifyAllocationIntrinsic(HInvoke* invoke);
void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
+ void SimplifyMin(HInvoke* invoke, DataType::Type type);
+ void SimplifyMax(HInvoke* invoke, DataType::Type type);
void SimplifyAbs(HInvoke* invoke, DataType::Type type);
CodeGenerator* codegen_;
@@ -2407,6 +2409,20 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke,
invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier);
}
+void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMin* min = new (GetGraph()->GetAllocator())
+ HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min);
+}
+
+void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMax* max = new (GetGraph()->GetAllocator())
+ HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max);
+}
+
void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) {
DCHECK(invoke->IsInvokeStaticOrDirect());
HAbs* abs = new (GetGraph()->GetAllocator())
@@ -2497,6 +2513,30 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
case Intrinsics::kVarHandleStoreStoreFence:
SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore);
break;
+ case Intrinsics::kMathMinIntInt:
+ SimplifyMin(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMinLongLong:
+ SimplifyMin(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMinFloatFloat:
+ SimplifyMin(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMinDoubleDouble:
+ SimplifyMin(instruction, DataType::Type::kFloat64);
+ break;
+ case Intrinsics::kMathMaxIntInt:
+ SimplifyMax(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMaxLongLong:
+ SimplifyMax(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMaxFloatFloat:
+ SimplifyMax(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMaxDoubleDouble:
+ SimplifyMax(instruction, DataType::Type::kFloat64);
+ break;
case Intrinsics::kMathAbsInt:
SimplifyAbs(instruction, DataType::Type::kInt32);
break;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index acb830e524..f8dc316e45 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -18,6 +18,7 @@
#include "art_field-inl.h"
#include "art_method-inl.h"
+#include "base/utils.h"
#include "class_linker.h"
#include "dex/invoke_type.h"
#include "driver/compiler_driver.h"
@@ -26,7 +27,6 @@
#include "nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 24aff226ca..1035cbc2c4 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -266,6 +266,14 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \
<< " should have been converted to HIR"; \
}
#define UNREACHABLE_INTRINSICS(Arch) \
+UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \
UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \
UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \
UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 0b04fff927..81c0b50932 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
}
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
static void GenNumberOfLeadingZeros(LocationSummary* locations,
DataType::Type type,
MacroAssembler* masm) {
@@ -529,113 +521,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
- FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
- FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
- if (is_min) {
- __ Fmin(out_reg, op1_reg, op2_reg);
- } else {
- __ Fmax(out_reg, op1_reg, op2_reg);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- bool is_long,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
- Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
- Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
-
- __ Cmp(op1_reg, op2_reg);
- __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index e351fcc706..e61a0b0809 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -432,264 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo
GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
}
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::SRegister op1 = SRegisterFrom(op1_loc);
- vixl32::SRegister op2 = SRegisterFrom(op2_loc);
- vixl32::SRegister out = OutputSRegister(invoke);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp1 = temps.Acquire();
- vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
- vixl32::Label nan, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F32, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
- __ Vmov(temp1, op1);
- __ Vmov(temp2, op2);
- if (is_min) {
- __ Orr(temp1, temp1, temp2);
- } else {
- __ And(temp1, temp1, temp2);
- }
- __ Vmov(out, temp1);
- __ B(final_label);
-
- // handle NaN input.
- __ Bind(&nan);
- __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
- __ Vmov(out, temp1);
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
- return;
- }
-
- vixl32::DRegister op1 = DRegisterFrom(op1_loc);
- vixl32::DRegister op2 = DRegisterFrom(op2_loc);
- vixl32::DRegister out = OutputDRegister(invoke);
- vixl32::Label handle_nan_eq, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F64, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0).
- if (!is_min) {
- __ Vand(F64, out, op1, op2);
- __ B(final_label);
- }
-
- // handle op1 == op2, min(+0.0,-0.0), NaN input.
- __ Bind(&handle_nan_eq);
- __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
- vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
- vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
- vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
- vixl32::Register out_lo = LowRegisterFrom(out_loc);
- vixl32::Register out_hi = HighRegisterFrom(out_loc);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp = temps.Acquire();
-
- DCHECK(op1_lo.Is(out_lo));
- DCHECK(op1_hi.Is(out_hi));
-
- // Compare op1 >= op2, or op1 < op2.
- __ Cmp(out_lo, op2_lo);
- __ Sbcs(temp, out_hi, op2_hi);
-
- // Now GE/LT condition code is correct for the long comparison.
- {
- vixl32::ConditionType cond = is_min ? ge : lt;
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ itt(cond);
- __ mov(cond, out_lo, op2_lo);
- __ mov(cond, out_hi, op2_hi);
- }
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
-}
-
-static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- vixl32::Register op1 = InputRegisterAt(invoke, 0);
- vixl32::Register op2 = InputRegisterAt(invoke, 1);
- vixl32::Register out = OutputRegister(invoke);
-
- __ Cmp(op1, op2);
-
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ ite(is_min ? lt : gt);
- __ mov(is_min ? lt : gt, out, op1);
- __ mov(is_min ? ge : le, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ false, GetAssembler());
-}
-
void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
CreateFPToFPLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 6d6ff7576b..bc1292b2b7 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
}
+inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
DataType::Type type,
bool isR6,
+ bool hasMsa,
MipsAssembler* assembler) {
Register out = locations->Out().AsRegister<Register>();
@@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations,
// instructions compared to a loop-based algorithm which required 47
// instructions.
- if (type == DataType::Type::kInt32) {
- Register in = locations->InAt(0).AsRegister<Register>();
-
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- if (isR6) {
- __ MulR6(out, out, TMP);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
} else {
- __ MulR2(out, out, TMP);
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ __ Mtc1(in_lo, FTMP);
+ __ Mthc1(in_hi, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
}
- __ Srl(out, out, 24);
} else {
- DCHECK_EQ(type, DataType::Type::kInt64);
- Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
- Register out_hi = locations->GetTemp(1).AsRegister<Register>();
- Register tmp_lo = TMP;
- Register out_lo = out;
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
- __ Srl(tmp_lo, in_lo, 1);
- __ Srl(tmp_hi, in_hi, 1);
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ if (isR6) {
+ __ MulR6(out, out, TMP);
+ } else {
+ __ MulR2(out, out, TMP);
+ }
+ __ Srl(out, out, 24);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
+ Register out_hi = locations->GetTemp(1).AsRegister<Register>();
+ Register tmp_lo = TMP;
+ Register out_lo = out;
- __ LoadConst32(AT, 0x55555555);
+ __ Srl(tmp_lo, in_lo, 1);
+ __ Srl(tmp_hi, in_hi, 1);
- __ And(tmp_lo, tmp_lo, AT);
- __ Subu(tmp_lo, in_lo, tmp_lo);
+ __ LoadConst32(AT, 0x55555555);
- __ And(tmp_hi, tmp_hi, AT);
- __ Subu(tmp_hi, in_hi, tmp_hi);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Subu(tmp_lo, in_lo, tmp_lo);
- __ LoadConst32(AT, 0x33333333);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Subu(tmp_hi, in_hi, tmp_hi);
- __ And(out_lo, tmp_lo, AT);
- __ Srl(tmp_lo, tmp_lo, 2);
- __ And(tmp_lo, tmp_lo, AT);
- __ Addu(tmp_lo, out_lo, tmp_lo);
+ __ LoadConst32(AT, 0x33333333);
- __ And(out_hi, tmp_hi, AT);
- __ Srl(tmp_hi, tmp_hi, 2);
- __ And(tmp_hi, tmp_hi, AT);
- __ Addu(tmp_hi, out_hi, tmp_hi);
+ __ And(out_lo, tmp_lo, AT);
+ __ Srl(tmp_lo, tmp_lo, 2);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Addu(tmp_lo, out_lo, tmp_lo);
- // Here we deviate from the original algorithm a bit. We've reached
- // the stage where the bitfields holding the subtotals are large
- // enough to hold the combined subtotals for both the low word, and
- // the high word. This means that we can add the subtotals for the
- // the high, and low words into a single word, and compute the final
- // result for both the high, and low words using fewer instructions.
- __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out_hi, tmp_hi, AT);
+ __ Srl(tmp_hi, tmp_hi, 2);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Addu(tmp_hi, out_hi, tmp_hi);
- __ Addu(TMP, tmp_hi, tmp_lo);
+ // Here we deviate from the original algorithm a bit. We've reached
+ // the stage where the bitfields holding the subtotals are large
+ // enough to hold the combined subtotals for both the low word, and
+ // the high word. This means that we can add the subtotals for the
+ // the high, and low words into a single word, and compute the final
+ // result for both the high, and low words using fewer instructions.
+ __ LoadConst32(AT, 0x0F0F0F0F);
- __ Srl(out, TMP, 4);
- __ And(out, out, AT);
- __ And(TMP, TMP, AT);
- __ Addu(out, out, TMP);
+ __ Addu(TMP, tmp_hi, tmp_lo);
- __ LoadConst32(AT, 0x01010101);
+ __ Srl(out, TMP, 4);
+ __ And(out, out, AT);
+ __ And(TMP, TMP, AT);
+ __ Addu(out, out, TMP);
- if (isR6) {
- __ MulR6(out, out, AT);
- } else {
- __ MulR2(out, out, AT);
- }
+ __ LoadConst32(AT, 0x01010101);
- __ Srl(out, out, 24);
+ if (isR6) {
+ __ MulR6(out, out, AT);
+ } else {
+ __ MulR2(out, out, AT);
+ }
+
+ __ Srl(out, out, 24);
+ }
}
}
@@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(int)
@@ -739,459 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
- FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
- FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
-
- if (is_R6) {
- MipsLabel noNaNs;
- MipsLabel done;
- FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
- } else {
- MipsLabel ordered;
- MipsLabel compare;
- MipsLabel select;
- MipsLabel done;
-
- if (type == DataType::Type::kFloat64) {
- __ CunD(a, b);
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CunS(a, b);
- }
- __ Bc1f(&ordered);
-
- // a or b (or both) is a NaN. Return one, which is a NaN.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(b, b);
- } else {
- __ CeqS(b, b);
- }
- __ B(&select);
-
- __ Bind(&ordered);
-
- // Neither is a NaN.
- // a == b? (-0.0 compares equal with +0.0)
- // If equal, handle zeroes, else compare further.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(a, b);
- } else {
- __ CeqS(a, b);
- }
- __ Bc1f(&compare);
-
- // a == b either bit for bit or one is -0.0 and the other is +0.0.
- if (type == DataType::Type::kFloat64) {
- __ MoveFromFpuHigh(TMP, a);
- __ MoveFromFpuHigh(AT, b);
- } else {
- __ Mfc1(TMP, a);
- __ Mfc1(AT, b);
- }
-
- if (is_min) {
- // -0.0 prevails over +0.0.
- __ Or(TMP, TMP, AT);
- } else {
- // +0.0 prevails over -0.0.
- __ And(TMP, TMP, AT);
- }
-
- if (type == DataType::Type::kFloat64) {
- __ Mfc1(AT, a);
- __ Mtc1(AT, out);
- __ MoveToFpuHigh(TMP, out);
- } else {
- __ Mtc1(TMP, out);
- }
- __ B(&done);
-
- __ Bind(&compare);
-
- if (type == DataType::Type::kFloat64) {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeD(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeD(b, a); // b <= a
- }
- } else {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeS(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeS(b, a); // b <= a
- }
- }
-
- __ Bind(&select);
-
- if (type == DataType::Type::kFloat64) {
- __ MovtD(out, a);
- __ MovfD(out, b);
- } else {
- __ MovtS(out, a);
- __ MovfS(out, b);
- }
-
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- if (is_R6) {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
- // always change the target (output) register. If the condition is
- // true the output register gets the contents of the "rs" register;
- // otherwise, the output register is set to zero. One consequence
- // of this is that to implement something like "rd = c==0 ? rs : rt"
- // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
- // After executing this pair of instructions one of the output
- // registers from the pair will necessarily contain zero. Then the
- // code ORs the output registers from the SELEQZ/SELNEZ instructions
- // to get the final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, b_hi, a_hi);
- __ Bne(b_hi, a_hi, &compare_done);
-
- __ Sltu(TMP, b_lo, a_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- __ Seleqz(AT, a_lo, TMP);
- __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
- // because at this point we're
- // done using a_lo/b_lo.
- } else {
- __ Selnez(AT, a_lo, TMP);
- __ Seleqz(out_lo, b_lo, TMP); // ditto
- }
- __ Or(out_lo, out_lo, AT);
- if (is_min) {
- __ Seleqz(AT, a_hi, TMP);
- __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- } else {
- __ Selnez(AT, a_hi, TMP);
- __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- }
- __ Or(out_hi, out_hi, AT);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, b, a);
- if (is_min) {
- __ Seleqz(TMP, a, AT);
- __ Selnez(AT, b, AT);
- } else {
- __ Selnez(TMP, a, AT);
- __ Seleqz(AT, b, AT);
- }
- __ Or(out, TMP, AT);
- }
- }
- } else {
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, a_hi, b_hi);
- __ Bne(a_hi, b_hi, &compare_done);
-
- __ Sltu(TMP, a_lo, b_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- if (out_lo != a_lo) {
- __ Movn(out_hi, a_hi, TMP);
- __ Movn(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movz(out_hi, b_hi, TMP);
- __ Movz(out_lo, b_lo, TMP);
- }
- } else {
- if (out_lo != a_lo) {
- __ Movz(out_hi, a_hi, TMP);
- __ Movz(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movn(out_hi, b_hi, TMP);
- __ Movn(out_lo, b_lo, TMP);
- }
- }
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, a, b);
- if (is_min) {
- if (out != a) {
- __ Movn(out, a, AT);
- }
- if (out != b) {
- __ Movz(out, b, AT);
- }
- } else {
- if (out != a) {
- __ Movz(out, a, AT);
- }
- if (out != b) {
- __ Movn(out, b, AT);
- }
- }
- }
- }
- }
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index 13397f11d4..1c1ba40132 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
bool IsR2OrNewer() const;
bool IsR6() const;
bool Is32BitFPU() const;
+ bool HasMsa() const;
private:
MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 5debd26c5a..f429afde2c 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() {
return codegen_->GetGraph()->GetAllocator();
}
+inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
const DataType::Type type,
+ const bool hasMsa,
Mips64Assembler* assembler) {
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
@@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations,
// bits are set but the algorithm here attempts to minimize the total
// number of instructions executed even when a large number of bits
// are set.
-
- if (type == DataType::Type::kInt32) {
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- __ MulR6(out, out, TMP);
- __ Srl(out, out, 24);
- } else if (type == DataType::Type::kInt64) {
- __ Dsrl(TMP, in, 1);
- __ LoadConst64(AT, 0x5555555555555555L);
- __ And(TMP, TMP, AT);
- __ Dsubu(TMP, in, TMP);
- __ LoadConst64(AT, 0x3333333333333333L);
- __ And(out, TMP, AT);
- __ Dsrl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Daddu(TMP, out, TMP);
- __ Dsrl(out, TMP, 4);
- __ Daddu(out, out, TMP);
- __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
- __ And(out, out, AT);
- __ LoadConst64(TMP, 0x0101010101010101L);
- __ Dmul(out, out, TMP);
- __ Dsrl32(out, out, 24);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
+ } else {
+ __ Dmtc1(in, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Dmfc1(out, FTMP);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ __ MulR6(out, out, TMP);
+ __ Srl(out, out, 24);
+ } else {
+ __ Dsrl(TMP, in, 1);
+ __ LoadConst64(AT, 0x5555555555555555L);
+ __ And(TMP, TMP, AT);
+ __ Dsubu(TMP, in, TMP);
+ __ LoadConst64(AT, 0x3333333333333333L);
+ __ And(out, TMP, AT);
+ __ Dsrl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Daddu(TMP, out, TMP);
+ __ Dsrl(out, TMP, 4);
+ __ Daddu(out, out, TMP);
+ __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+ __ And(out, out, AT);
+ __ LoadConst64(TMP, 0x0101010101010101L);
+ __ Dmul(out, out, TMP);
+ __ Dsrl32(out, out, 24);
+ }
}
}
@@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(long)
@@ -467,222 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- Mips64Assembler* assembler) {
- FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
- FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
- Mips64Label noNaNs;
- Mips64Label done;
- FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- Mips64Assembler* assembler) {
- GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
- if (lhs == rhs) {
- if (out != lhs) {
- __ Move(out, lhs);
- }
- } else {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
- // change the target (output) register. If the condition is true the
- // output register gets the contents of the "rs" register; otherwise,
- // the output register is set to zero. One consequence of this is
- // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
- // needs to use a pair of SELEQZ/SELNEZ instructions. After
- // executing this pair of instructions one of the output registers
- // from the pair will necessarily contain zero. Then the code ORs the
- // output registers from the SELEQZ/SELNEZ instructions to get the
- // final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (out == lhs) {
- __ Slt(AT, rhs, lhs);
- if (is_min) {
- __ Seleqz(out, lhs, AT);
- __ Selnez(AT, rhs, AT);
- } else {
- __ Selnez(out, lhs, AT);
- __ Seleqz(AT, rhs, AT);
- }
- } else {
- __ Slt(AT, lhs, rhs);
- if (is_min) {
- __ Seleqz(out, rhs, AT);
- __ Selnez(AT, lhs, AT);
- } else {
- __ Selnez(out, rhs, AT);
- __ Seleqz(AT, lhs, AT);
- }
- }
- __ Or(out, out, AT);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 6f40d90ddb..748b0b02b2 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -68,6 +68,8 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
#undef INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
+ bool HasMsa() const;
+
private:
Mips64Assembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 0edc061e97..c4f322bf0c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -40,11 +40,6 @@ namespace art {
namespace x86 {
-static constexpr int kDoubleNaNHigh = 0x7FF80000;
-static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
-static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
-
IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
: allocator_(codegen->GetGraph()->GetAllocator()),
codegen_(codegen) {
@@ -333,278 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-static void GenMinMaxFP(HInvoke* invoke,
- bool is_min,
- bool is_double,
- X86Assembler* assembler,
- CodeGeneratorX86* codegen) {
- LocationSummary* locations = invoke->GetLocations();
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- // Do we have a constant area pointer?
- if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
- HX86ComputeBaseMethodAddress* method_address =
- invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
- DCHECK(locations->InAt(2).IsRegister());
- Register constant_area = locations->InAt(2).AsRegister<Register>();
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
- } else {
- __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
- }
- } else {
- if (is_double) {
- __ pushl(Immediate(kDoubleNaNHigh));
- __ pushl(Immediate(kDoubleNaNLow));
- __ movsd(out, Address(ESP, 0));
- __ addl(ESP, Immediate(8));
- } else {
- __ pushl(Immediate(kFloatNaN));
- __ movss(out, Address(ESP, 0));
- __ addl(ESP, Immediate(4));
- }
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
- HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
- if (static_or_direct->HasSpecialInput() &&
- invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
- locations->SetInAt(2, Location::RequiresRegister());
- }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- if (is_long) {
- // Need to perform a subtract to get the sign right.
- // op1 is already in the same location as the output.
- Location output = locations->Out();
- Register output_lo = output.AsRegisterPairLow<Register>();
- Register output_hi = output.AsRegisterPairHigh<Register>();
-
- Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
- Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
-
- // Spare register to compute the subtraction to set condition code.
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
- // Subtract off op2_low.
- __ movl(temp, output_lo);
- __ subl(temp, op2_lo);
-
- // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
- __ movl(temp, output_hi);
- __ sbbl(temp, op2_hi);
-
- // Now the condition code is correct.
- Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
- __ cmovl(cond, output_lo, op2_lo);
- __ cmovl(cond, output_hi, op2_hi);
- } else {
- Register out = locations->Out().AsRegister<Register>();
- Register op2 = op2_loc.AsRegister<Register>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- __ cmpl(out, op2);
- Condition cond = is_min ? Condition::kGreater : Condition::kLess;
- __ cmovl(cond, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- // Register to use to perform a long subtract to set cc.
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9d378d6b59..437bc3dd3c 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -236,208 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- X86_64Assembler* assembler,
- CodeGeneratorX86_64* codegen) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
- } else {
- __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86_64Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- if (is_long) {
- __ cmpq(out, op2);
- } else {
- __ cmpl(out, op2);
- }
-
- __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 5a483e234b..d3b081e005 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -334,29 +334,14 @@ static bool IsAddConst(HInstruction* instruction,
// Detect reductions of the following forms,
// x = x_phi + ..
// x = x_phi - ..
-// x = max(x_phi, ..)
// x = min(x_phi, ..)
+// x = max(x_phi, ..)
static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
- if (reduction->IsAdd()) {
+ if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
(reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
} else if (reduction->IsSub()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
- } else if (reduction->IsInvokeStaticOrDirect()) {
- switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble:
- return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
- (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
- default:
- return false;
- }
}
return false;
}
@@ -1322,50 +1307,34 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
return true;
}
- } else if (instruction->IsInvokeStaticOrDirect()) {
- // Accept particular intrinsics.
- HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- // Deal with vector restrictions.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* opb = instruction->InputAt(1);
- HInstruction* r = opa;
- HInstruction* s = opb;
- bool is_unsigned = false;
- if (HasVectorRestrictions(restrictions, kNoMinMax)) {
- return false;
- } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
- !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
- return false; // reject, unless all operands are same-extension narrower
- }
- // Accept MIN/MAX(x, y) for vectorizable operands.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
- if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = opa;
- s = opb;
- }
- if (VectorizeUse(node, r, generate_code, type, restrictions) &&
- VectorizeUse(node, s, generate_code, type, restrictions)) {
- if (generate_code) {
- GenerateVecOp(
- instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
- }
- return true;
- }
- return false;
+ } else if (instruction->IsMin() || instruction->IsMax()) {
+ // Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ HInstruction* r = opa;
+ HInstruction* s = opb;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+ return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+ return false; // reject, unless all operands are same-extension narrower
+ }
+ // Accept MIN/MAX(x, y) for vectorizable operands.
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ s = opb;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(
+ instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
}
- default:
- return false;
- } // switch
+ return true;
+ }
}
return false;
}
@@ -1806,80 +1775,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
GENERATE_VEC(
new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
new (global_allocator_) HUShr(org_type, opa, opb, dex_pc));
+ case HInstruction::kMin:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMin(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc),
+ new (global_allocator_) HMin(org_type, opa, opb, dex_pc));
+ case HInstruction::kMax:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMax(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc),
+ new (global_allocator_) HMax(org_type, opa, opb, dex_pc));
case HInstruction::kAbs:
DCHECK(opb == nullptr);
GENERATE_VEC(
new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
new (global_allocator_) HAbs(org_type, opa, dex_pc));
- case HInstruction::kInvokeStaticOrDirect: {
- HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
- if (vector_mode_ == kVector) {
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble: {
- vector = new (global_allocator_)
- HVecMin(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- vector = new (global_allocator_)
- HVecMax(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
- UNREACHABLE();
- } // switch invoke
- } else {
- // In scalar code, simply clone the method invoke, and replace its operands with the
- // corresponding new scalar instructions in the loop. The instruction will get an
- // environment while being inserted from the instruction map in original program order.
- DCHECK(vector_mode_ == kSequential);
- size_t num_args = invoke->GetNumberOfArguments();
- HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
- global_allocator_,
- num_args,
- invoke->GetType(),
- invoke->GetDexPc(),
- invoke->GetDexMethodIndex(),
- invoke->GetResolvedMethod(),
- invoke->GetDispatchInfo(),
- invoke->GetInvokeType(),
- invoke->GetTargetMethod(),
- invoke->GetClinitCheckRequirement());
- HInputsRef inputs = invoke->GetInputs();
- size_t num_inputs = inputs.size();
- DCHECK_LE(num_args, num_inputs);
- DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree
- for (size_t index = 0; index < num_inputs; ++index) {
- HInstruction* new_input = index < num_args
- ? vector_map_->Get(inputs[index])
- : inputs[index]; // beyond arguments: just pass through
- new_invoke->SetArgumentAt(index, new_input);
- }
- new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
- kNeedsEnvironmentOrCache,
- kNoSideEffects,
- kNoThrow);
- vector = new_invoke;
- }
- break;
- }
default:
break;
} // switch
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7f2d43fbd5..d42f4a7e80 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -26,6 +26,7 @@
#include "base/arena_object.h"
#include "base/array_ref.h"
#include "base/iteration_range.h"
+#include "base/quasi_atomic.h"
#include "base/stl_util.h"
#include "base/transform_array_ref.h"
#include "data_type.h"
@@ -1383,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(LoadException, Instruction) \
M(LoadString, Instruction) \
M(LongConstant, Constant) \
+ M(Max, Instruction) \
M(MemoryBarrier, Instruction) \
+ M(Min, BinaryOperation) \
M(MonitorOperation, Instruction) \
M(Mul, BinaryOperation) \
M(NativeDebugInfo, Instruction) \
@@ -5021,6 +5024,76 @@ class HRem FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Rem);
};
+class HMin FINAL : public HBinaryOperation {
+ public:
+ HMin(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const OVERRIDE { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x <= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+ DECLARE_INSTRUCTION(Min);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Min);
+};
+
+class HMax FINAL : public HBinaryOperation {
+ public:
+ HMax(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const OVERRIDE { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x >= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+ DECLARE_INSTRUCTION(Max);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Max);
+};
+
class HAbs FINAL : public HUnaryOperation {
public:
HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 0023265e50..00194ff1fe 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -22,9 +22,9 @@
#include <string>
#include <type_traits>
-#include "atomic.h"
+#include "base/atomic.h"
+#include "base/globals.h"
#include "base/logging.h" // For VLOG_IS_ON.
-#include "globals.h"
namespace art {
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a3ca6311c2..2591783cb6 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -234,12 +234,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
switch (invoke->GetIntrinsic()) {
case Intrinsics::kMathAbsDouble:
case Intrinsics::kMathAbsFloat:
- LOG(FATAL) << "Unreachable abs";
- UNREACHABLE();
case Intrinsics::kMathMaxDoubleDouble:
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMinDoubleDouble:
case Intrinsics::kMathMinFloatFloat:
+ LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered "
+ "to IR nodes by instruction simplifier";
+ UNREACHABLE();
case Intrinsics::kMathRoundFloat:
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index cdbe48342d..bca538fb17 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -679,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
instruction->IsCompare() ||
instruction->IsCondition() ||
instruction->IsDiv() ||
+ instruction->IsMin() ||
+ instruction->IsMax() ||
instruction->IsMul() ||
instruction->IsOr() ||
instruction->IsRem() ||