diff options
35 files changed, 1556 insertions, 1701 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 1f9c5546e2..b0ddd8e8c6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -78,6 +78,7 @@ using helpers::OutputFPRegister; using helpers::OutputRegister; using helpers::QRegisterFrom; using helpers::RegisterFrom; +using helpers::SRegisterFrom; using helpers::StackOperandFrom; using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; @@ -5462,6 +5463,119 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +// TODO: integrate with HandleBinaryOp? +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::GenerateMinMax(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + Register op1_reg; + Register op2_reg; + Register out_reg; + if (type == DataType::Type::kInt64) { + op1_reg = XRegisterFrom(op1); + op2_reg = XRegisterFrom(op2); + out_reg = XRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + op1_reg = WRegisterFrom(op1); + op2_reg = WRegisterFrom(op2); + out_reg = WRegisterFrom(out); + } + + __ Cmp(op1_reg, op2_reg); + __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); +} + +void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1 = locations->InAt(0); + Location op2 = locations->InAt(1); + Location out = locations->Out(); + + FPRegister op1_reg; + FPRegister op2_reg; + FPRegister out_reg; + if (type == DataType::Type::kFloat64) { + op1_reg = DRegisterFrom(op1); + op2_reg = DRegisterFrom(op2); + out_reg = DRegisterFrom(out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + op1_reg = SRegisterFrom(op1); + op2_reg = SRegisterFrom(op2); + out_reg = SRegisterFrom(out); + } + + if (is_min) { + __ Fmin(out_reg, op1_reg, op2_reg); + } else { + __ Fmax(out_reg, op1_reg, op2_reg); + } +} + +void LocationsBuilderARM64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +// TODO: integrate with HandleBinaryOp? +void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderARM64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + void LocationsBuilderARM64::VisitAbs(HAbs* abs) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); switch (abs->GetResultType()) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index e34f799d15..70f5500016 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -273,6 +273,9 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); + void GenerateMinMax(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 13518adf7d..4fef027e6d 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -4690,6 +4690,254 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMax(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + vixl32::Register op1 = RegisterFrom(op1_loc); + vixl32::Register op2 = RegisterFrom(op2_loc); + vixl32::Register out = RegisterFrom(out_loc); + + __ Cmp(op1, op2); + + { + ExactAssemblyScope aas(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::Register op1_lo = LowRegisterFrom(op1_loc); + vixl32::Register op1_hi = HighRegisterFrom(op1_loc); + vixl32::Register op2_lo = LowRegisterFrom(op2_loc); + vixl32::Register op2_hi = HighRegisterFrom(op2_loc); + vixl32::Register out_lo = LowRegisterFrom(out_loc); + vixl32::Register out_hi = HighRegisterFrom(out_loc); + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + DCHECK(op1_lo.Is(out_lo)); + DCHECK(op1_hi.Is(out_hi)); + + // Compare op1 >= op2, or op1 < op2. + __ Cmp(out_lo, op2_lo); + __ Sbcs(temp, out_hi, op2_hi); + + // Now GE/LT condition code is correct for the long comparison. + { + vixl32::ConditionType cond = is_min ? ge : lt; + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ itt(cond); + __ mov(cond, out_lo, op2_lo); + __ mov(cond, out_hi, op2_hi); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* min_max, bool is_min) { + LocationSummary* locations = min_max->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::SRegister op1 = SRegisterFrom(op1_loc); + vixl32::SRegister op2 = SRegisterFrom(op2_loc); + vixl32::SRegister out = SRegisterFrom(out_loc); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp1 = temps.Acquire(); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0)); + vixl32::Label nan, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(min_max, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F32, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). + __ Vmov(temp1, op1); + __ Vmov(temp2, op2); + if (is_min) { + __ Orr(temp1, temp1, temp2); + } else { + __ And(temp1, temp1, temp2); + } + __ Vmov(out, temp1); + __ B(final_label); + + // handle NaN input. + __ Bind(&nan); + __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. + __ Vmov(out, temp1); + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* min_max, bool is_min) { + LocationSummary* locations = min_max->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. + return; + } + + vixl32::DRegister op1 = DRegisterFrom(op1_loc); + vixl32::DRegister op2 = DRegisterFrom(op2_loc); + vixl32::DRegister out = DRegisterFrom(out_loc); + vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(min_max, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F64, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); + + // handle op1 == op2, max(+0.0,-0.0). + if (!is_min) { + __ Vand(F64, out, op1, op2); + __ B(final_label); + } + + // handle op1 == op2, min(+0.0,-0.0), NaN input. + __ Bind(&handle_nan_eq); + __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void LocationsBuilderARMVIXL::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + GenerateMinMax(min->GetLocations(), /*is_min*/ true); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(min->GetLocations(), /*is_min*/ true); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(min, /*is_min*/ true); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(min, /*is_min*/ true); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderARMVIXL::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + GenerateMinMax(max->GetLocations(), /*is_min*/ false); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(max->GetLocations(), /*is_min*/ false); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(max, /*is_min*/ false); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(max, /*is_min*/ false); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); switch (abs->GetResultType()) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index bbc715c59d..726a2f9030 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -349,6 +349,11 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMax(LocationSummary* locations, bool is_min); + void GenerateMinMaxLong(LocationSummary* locations, bool is_min); + void GenerateMinMaxFloat(HInstruction* min_max, bool is_min); + void GenerateMinMaxDouble(HInstruction* min_max, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index eb5f72e953..ae42bbcc70 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -8779,6 +8779,397 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMax(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + if (isR6) { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions + // always change the target (output) register. If the condition is + // true the output register gets the contents of the "rs" register; + // otherwise, the output register is set to zero. One consequence + // of this is that to implement something like "rd = c==0 ? rs : rt" + // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. + // After executing this pair of instructions one of the output + // registers from the pair will necessarily contain zero. Then the + // code ORs the output registers from the SELEQZ/SELNEZ instructions + // to get the final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, b_hi, a_hi); + __ Bne(b_hi, a_hi, &compare_done); + + __ Sltu(TMP, b_lo, a_lo); + + __ Bind(&compare_done); + + if (is_min) { + __ Seleqz(AT, a_lo, TMP); + __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo + // because at this point we're + // done using a_lo/b_lo. + } else { + __ Selnez(AT, a_lo, TMP); + __ Seleqz(out_lo, b_lo, TMP); // ditto + } + __ Or(out_lo, out_lo, AT); + if (is_min) { + __ Seleqz(AT, a_hi, TMP); + __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } else { + __ Selnez(AT, a_hi, TMP); + __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } + __ Or(out_hi, out_hi, AT); + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, b, a); + if (is_min) { + __ Seleqz(TMP, a, AT); + __ Selnez(AT, b, AT); + } else { + __ Selnez(TMP, a, AT); + __ Seleqz(AT, b, AT); + } + __ Or(out, TMP, AT); + } + } + } else { // !isR6 + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, a_hi, b_hi); + __ Bne(a_hi, b_hi, &compare_done); + + __ Sltu(TMP, a_lo, b_lo); + + __ Bind(&compare_done); + + if (is_min) { + if (out_lo != a_lo) { + __ Movn(out_hi, a_hi, TMP); + __ Movn(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movz(out_hi, b_hi, TMP); + __ Movz(out_lo, b_lo, TMP); + } + } else { + if (out_lo != a_lo) { + __ Movz(out_hi, a_hi, TMP); + __ Movz(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movn(out_hi, b_hi, TMP); + __ Movn(out_lo, b_lo, TMP); + } + } + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, a, b); + if (is_min) { + if (out != a) { + __ Movn(out, a, AT); + } + if (out != b) { + __ Movz(out, b, AT); + } + } else { + if (out != a) { + __ Movz(out, a, AT); + } + if (out != b) { + __ Movn(out, b, AT); + } + } + } + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); + + if (isR6) { + MipsLabel noNaNs; + MipsLabel done; + FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); + + } else { // !isR6 + MipsLabel ordered; + MipsLabel compare; + MipsLabel select; + MipsLabel done; + + if (type == DataType::Type::kFloat64) { + __ CunD(a, b); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CunS(a, b); + } + __ Bc1f(&ordered); + + // a or b (or both) is a NaN. Return one, which is a NaN. + if (type == DataType::Type::kFloat64) { + __ CeqD(b, b); + } else { + __ CeqS(b, b); + } + __ B(&select); + + __ Bind(&ordered); + + // Neither is a NaN. + // a == b? (-0.0 compares equal with +0.0) + // If equal, handle zeroes, else compare further. + if (type == DataType::Type::kFloat64) { + __ CeqD(a, b); + } else { + __ CeqS(a, b); + } + __ Bc1f(&compare); + + // a == b either bit for bit or one is -0.0 and the other is +0.0. + if (type == DataType::Type::kFloat64) { + __ MoveFromFpuHigh(TMP, a); + __ MoveFromFpuHigh(AT, b); + } else { + __ Mfc1(TMP, a); + __ Mfc1(AT, b); + } + + if (is_min) { + // -0.0 prevails over +0.0. + __ Or(TMP, TMP, AT); + } else { + // +0.0 prevails over -0.0. + __ And(TMP, TMP, AT); + } + + if (type == DataType::Type::kFloat64) { + __ Mfc1(AT, a); + __ Mtc1(AT, out); + __ MoveToFpuHigh(TMP, out); + } else { + __ Mtc1(TMP, out); + } + __ B(&done); + + __ Bind(&compare); + + if (type == DataType::Type::kFloat64) { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeD(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeD(b, a); // b <= a + } + } else { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeS(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeS(b, a); // b <= a + } + } + + __ Bind(&select); + + if (type == DataType::Type::kFloat64) { + __ MovtD(out, a); + __ MovfD(out, b); + } else { + __ MovtS(out, a); + __ MovfS(out, b); + } + + __ Bind(&done); + } +} + +void LocationsBuilderMIPS::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, isR6, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, isR6, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderMIPS::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, isR6, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, isR6, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + void LocationsBuilderMIPS::VisitAbs(HAbs* abs) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); switch (abs->GetResultType()) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index d09ab7cce0..ae5fe5be19 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -246,6 +246,8 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + void GenerateMinMax(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6); // Generate a heap reference load using one register `out`: diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 9593eec455..8031cca7cb 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -6665,6 +6665,188 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMax(LocationSummary* locations, bool is_min) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (lhs == rhs) { + if (out != lhs) { + __ Move(out, lhs); + } + } else { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Mips64Label noNaNs; + Mips64Label done; + FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); +} + +void LocationsBuilderMIPS64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderMIPS64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); switch (abs->GetResultType()) { diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index ddeb3eb90c..5d925d5d5a 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -242,6 +242,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMax(LocationSummary* locations, bool is_min); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 51b96be0f8..536909aa1f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); +static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); +static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() @@ -3802,6 +3805,217 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Register to use to perform a long subtract to set cc. + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMax(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + if (type == DataType::Type::kInt64) { + // Need to perform a subtract to get the sign right. + // op1 is already in the same location as the output. + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); + Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); + + // The comparison is performed by subtracting the second operand from + // the first operand and then setting the status flags in the same + // manner as the SUB instruction." + __ cmpl(output_lo, op2_lo); + + // Now use a temp and the borrow to finish the subtraction of op2_hi. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ movl(temp, output_hi); + __ sbbl(temp, op2_hi); + + // Now the condition code is correct. + Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; + __ cmovl(cond, output_lo, op2_lo); + __ cmovl(cond, output_hi, op2_hi); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register out = locations->Out().AsRegister<Register>(); + Register op2 = op2_loc.AsRegister<Register>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + __ cmpl(out, op2); + Condition cond = is_min ? Condition::kGreater : Condition::kLess; + __ cmovl(cond, out, op2); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(out, kDoubleNaN); + } else { + Register constant = locations->GetTemp(0).AsRegister<Register>(); + __ movl(constant, Immediate(kFloatNaN)); + __ movd(out, constant); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void LocationsBuilderX86::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderX86::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + void LocationsBuilderX86::VisitAbs(HAbs* abs) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); switch (abs->GetResultType()) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 51e5bca00b..82496d12e5 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -225,6 +225,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); + void GenerateMinMax(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 0bb56a2b4a..bb1fbc5290 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -3821,6 +3821,183 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMax(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (type == DataType::Type::kInt64) { + __ cmpq(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ cmpl(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); + } else { + __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void LocationsBuilderX86_64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { + switch (min->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(min->GetLocations(), /*is_min*/ true, min->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMin " << min->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { + switch (max->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMax(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(max->GetLocations(), /*is_min*/ false, max->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected type for HMax " << max->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); switch (abs->GetResultType()) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1079e94dfc..933afdab26 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -222,6 +222,9 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMax(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index d699d01ecb..0a310ca940 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -78,16 +78,10 @@ static bool IsGEZero(HInstruction* instruction) { DCHECK(instruction != nullptr); if (instruction->IsArrayLength()) { return true; - } else if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // Instruction MIN(>=0, >=0) is >= 0. - return IsGEZero(instruction->InputAt(0)) && - IsGEZero(instruction->InputAt(1)); - default: - break; - } + } else if (instruction->IsMin()) { + // Instruction MIN(>=0, >=0) is >= 0. + return IsGEZero(instruction->InputAt(0)) && + IsGEZero(instruction->InputAt(1)); } else if (instruction->IsAbs()) { // Instruction ABS(>=0) is >= 0. // NOTE: ABS(minint) = minint prevents assuming @@ -101,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) { /** Hunts "under the hood" for a suitable instruction at the hint. */ static bool IsMaxAtHint( HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) { - if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // For MIN(x, y), return most suitable x or y as maximum. - return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || - IsMaxAtHint(instruction->InputAt(1), hint, suitable); - default: - break; - } + if (instruction->IsMin()) { + // For MIN(x, y), return most suitable x or y as maximum. + return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || + IsMaxAtHint(instruction->InputAt(1), hint, suitable); } else { *suitable = instruction; return HuntForDeclaration(instruction) == hint; } - return false; } /** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */ @@ -364,11 +351,11 @@ void InductionVarRange::Replace(HInstruction* instruction, } } -bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const { +bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const { HInductionVarAnalysis::InductionInfo *trip = induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); if (trip != nullptr && !IsUnsafeTripCount(trip)) { - IsConstant(trip->op_a, kExact, tc); + IsConstant(trip->op_a, kExact, trip_count); return true; } return false; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index eac8d2f593..34837700a2 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -120,6 +120,8 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyReturnThis(HInvoke* invoke); void SimplifyAllocationIntrinsic(HInvoke* invoke); void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); + void SimplifyMin(HInvoke* invoke, DataType::Type type); + void SimplifyMax(HInvoke* invoke, DataType::Type type); void SimplifyAbs(HInvoke* invoke, DataType::Type type); CodeGenerator* codegen_; @@ -2407,6 +2409,20 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier); } +void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMin* min = new (GetGraph()->GetAllocator()) + HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min); +} + +void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMax* max = new (GetGraph()->GetAllocator()) + HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max); +} + void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) { DCHECK(invoke->IsInvokeStaticOrDirect()); HAbs* abs = new (GetGraph()->GetAllocator()) @@ -2497,6 +2513,30 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kVarHandleStoreStoreFence: SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore); break; + case Intrinsics::kMathMinIntInt: + SimplifyMin(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMinLongLong: + SimplifyMin(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMinFloatFloat: + SimplifyMin(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMinDoubleDouble: + SimplifyMin(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathMaxIntInt: + SimplifyMax(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMaxLongLong: + SimplifyMax(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMaxFloatFloat: + SimplifyMax(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMaxDoubleDouble: + SimplifyMax(instruction, DataType::Type::kFloat64); + break; case Intrinsics::kMathAbsInt: SimplifyAbs(instruction, DataType::Type::kInt32); break; diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 24aff226ca..1035cbc2c4 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -266,6 +266,14 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \ << " should have been converted to HIR"; \ } #define UNREACHABLE_INTRINSICS(Arch) \ +UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \ UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \ UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \ UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 0b04fff927..81c0b50932 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenNumberOfLeadingZeros(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -529,113 +521,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) { GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); - FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); - FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); - if (is_min) { - __ Fmin(out_reg, op1_reg, op2_reg); - } else { - __ Fmax(out_reg, op1_reg, op2_reg); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - bool is_long, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); - Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); - - __ Cmp(op1_reg, op2_reg); - __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index e351fcc706..e61a0b0809 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -432,264 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); } -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::SRegister op1 = SRegisterFrom(op1_loc); - vixl32::SRegister op2 = SRegisterFrom(op2_loc); - vixl32::SRegister out = OutputSRegister(invoke); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp1 = temps.Acquire(); - vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); - vixl32::Label nan, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F32, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). - __ Vmov(temp1, op1); - __ Vmov(temp2, op2); - if (is_min) { - __ Orr(temp1, temp1, temp2); - } else { - __ And(temp1, temp1, temp2); - } - __ Vmov(out, temp1); - __ B(final_label); - - // handle NaN input. - __ Bind(&nan); - __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. - __ Vmov(out, temp1); - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. - return; - } - - vixl32::DRegister op1 = DRegisterFrom(op1_loc); - vixl32::DRegister op2 = DRegisterFrom(op2_loc); - vixl32::DRegister out = OutputDRegister(invoke); - vixl32::Label handle_nan_eq, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F64, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0). - if (!is_min) { - __ Vand(F64, out, op1, op2); - __ B(final_label); - } - - // handle op1 == op2, min(+0.0,-0.0), NaN input. - __ Bind(&handle_nan_eq); - __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::Register op1_lo = LowRegisterFrom(op1_loc); - vixl32::Register op1_hi = HighRegisterFrom(op1_loc); - vixl32::Register op2_lo = LowRegisterFrom(op2_loc); - vixl32::Register op2_hi = HighRegisterFrom(op2_loc); - vixl32::Register out_lo = LowRegisterFrom(out_loc); - vixl32::Register out_hi = HighRegisterFrom(out_loc); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp = temps.Acquire(); - - DCHECK(op1_lo.Is(out_lo)); - DCHECK(op1_hi.Is(out_hi)); - - // Compare op1 >= op2, or op1 < op2. - __ Cmp(out_lo, op2_lo); - __ Sbcs(temp, out_hi, op2_hi); - - // Now GE/LT condition code is correct for the long comparison. - { - vixl32::ConditionType cond = is_min ? ge : lt; - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ itt(cond); - __ mov(cond, out_lo, op2_lo); - __ mov(cond, out_hi, op2_hi); - } -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); -} - -static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - vixl32::Register op1 = InputRegisterAt(invoke, 0); - vixl32::Register op2 = InputRegisterAt(invoke, 1); - vixl32::Register out = OutputRegister(invoke); - - __ Cmp(op1, op2); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ ite(is_min ? lt : gt); - __ mov(is_min ? lt : gt, out, op1); - __ mov(is_min ? ge : le, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ false, GetAssembler()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 6d6ff7576b..d108c43a4c 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -742,458 +742,6 @@ void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); - - if (is_R6) { - MipsLabel noNaNs; - MipsLabel done; - FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); - } else { - MipsLabel ordered; - MipsLabel compare; - MipsLabel select; - MipsLabel done; - - if (type == DataType::Type::kFloat64) { - __ CunD(a, b); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CunS(a, b); - } - __ Bc1f(&ordered); - - // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == DataType::Type::kFloat64) { - __ CeqD(b, b); - } else { - __ CeqS(b, b); - } - __ B(&select); - - __ Bind(&ordered); - - // Neither is a NaN. - // a == b? (-0.0 compares equal with +0.0) - // If equal, handle zeroes, else compare further. - if (type == DataType::Type::kFloat64) { - __ CeqD(a, b); - } else { - __ CeqS(a, b); - } - __ Bc1f(&compare); - - // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, a); - __ MoveFromFpuHigh(AT, b); - } else { - __ Mfc1(TMP, a); - __ Mfc1(AT, b); - } - - if (is_min) { - // -0.0 prevails over +0.0. - __ Or(TMP, TMP, AT); - } else { - // +0.0 prevails over -0.0. - __ And(TMP, TMP, AT); - } - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, a); - __ Mtc1(AT, out); - __ MoveToFpuHigh(TMP, out); - } else { - __ Mtc1(TMP, out); - } - __ B(&done); - - __ Bind(&compare); - - if (type == DataType::Type::kFloat64) { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeD(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeD(b, a); // b <= a - } - } else { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeS(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeS(b, a); // b <= a - } - } - - __ Bind(&select); - - if (type == DataType::Type::kFloat64) { - __ MovtD(out, a); - __ MovfD(out, b); - } else { - __ MovtS(out, a); - __ MovfS(out, b); - } - - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - if (is_R6) { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions - // always change the target (output) register. If the condition is - // true the output register gets the contents of the "rs" register; - // otherwise, the output register is set to zero. One consequence - // of this is that to implement something like "rd = c==0 ? rs : rt" - // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. - // After executing this pair of instructions one of the output - // registers from the pair will necessarily contain zero. Then the - // code ORs the output registers from the SELEQZ/SELNEZ instructions - // to get the final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, b_hi, a_hi); - __ Bne(b_hi, a_hi, &compare_done); - - __ Sltu(TMP, b_lo, a_lo); - - __ Bind(&compare_done); - - if (is_min) { - __ Seleqz(AT, a_lo, TMP); - __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo - // because at this point we're - // done using a_lo/b_lo. - } else { - __ Selnez(AT, a_lo, TMP); - __ Seleqz(out_lo, b_lo, TMP); // ditto - } - __ Or(out_lo, out_lo, AT); - if (is_min) { - __ Seleqz(AT, a_hi, TMP); - __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } else { - __ Selnez(AT, a_hi, TMP); - __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } - __ Or(out_hi, out_hi, AT); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, b, a); - if (is_min) { - __ Seleqz(TMP, a, AT); - __ Selnez(AT, b, AT); - } else { - __ Selnez(TMP, a, AT); - __ Seleqz(AT, b, AT); - } - __ Or(out, TMP, AT); - } - } - } else { - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, a_hi, b_hi); - __ Bne(a_hi, b_hi, &compare_done); - - __ Sltu(TMP, a_lo, b_lo); - - __ Bind(&compare_done); - - if (is_min) { - if (out_lo != a_lo) { - __ Movn(out_hi, a_hi, TMP); - __ Movn(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movz(out_hi, b_hi, TMP); - __ Movz(out_lo, b_lo, TMP); - } - } else { - if (out_lo != a_lo) { - __ Movz(out_hi, a_hi, TMP); - __ Movz(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movn(out_hi, b_hi, TMP); - __ Movn(out_lo, b_lo, TMP); - } - } - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, a, b); - if (is_min) { - if (out != a) { - __ Movn(out, a, AT); - } - if (out != b) { - __ Movz(out, b, AT); - } - } else { - if (out != a) { - __ Movz(out, a, AT); - } - if (out != b) { - __ Movn(out, b, AT); - } - } - } - } - } -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - // double java.lang.Math.sqrt(double) void IntrinsicLocationsBuilderMIPS::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 5debd26c5a..9987d05fb3 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -470,221 +470,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - Mips64Assembler* assembler) { - FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - Mips64Label noNaNs; - Mips64Label done; - FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - Mips64Assembler* assembler) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (lhs == rhs) { - if (out != lhs) { - __ Move(out, lhs); - } - } else { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); - } - } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); - } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); - } - } - __ Or(out, out, AT); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - // double java.lang.Math.sqrt(double) void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 0edc061e97..c4f322bf0c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -40,11 +40,6 @@ namespace art { namespace x86 { -static constexpr int kDoubleNaNHigh = 0x7FF80000; -static constexpr int kDoubleNaNLow = 0x00000000; -static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); -static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); - IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { @@ -333,278 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } -static void GenMinMaxFP(HInvoke* invoke, - bool is_min, - bool is_double, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - // Do we have a constant area pointer? - if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(2).IsRegister()); - Register constant_area = locations->InAt(2).AsRegister<Register>(); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); - } else { - __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); - } - } else { - if (is_double) { - __ pushl(Immediate(kDoubleNaNHigh)); - __ pushl(Immediate(kDoubleNaNLow)); - __ movsd(out, Address(ESP, 0)); - __ addl(ESP, Immediate(8)); - } else { - __ pushl(Immediate(kFloatNaN)); - __ movss(out, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); - } - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - locations->SetInAt(2, Location::RequiresRegister()); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - if (is_long) { - // Need to perform a subtract to get the sign right. - // op1 is already in the same location as the output. - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - - Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); - Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); - - // Spare register to compute the subtraction to set condition code. - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Subtract off op2_low. - __ movl(temp, output_lo); - __ subl(temp, op2_lo); - - // Now use the same tempo and the borrow to finish the subtraction of op2_hi. - __ movl(temp, output_hi); - __ sbbl(temp, op2_hi); - - // Now the condition code is correct. - Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; - __ cmovl(cond, output_lo, op2_lo); - __ cmovl(cond, output_hi, op2_hi); - } else { - Register out = locations->Out().AsRegister<Register>(); - Register op2 = op2_loc.AsRegister<Register>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - __ cmpl(out, op2); - Condition cond = is_min ? Condition::kGreater : Condition::kLess; - __ cmovl(cond, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - // Register to use to perform a long subtract to set cc. - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 9d378d6b59..437bc3dd3c 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -236,208 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); - } else { - __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86_64Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - if (is_long) { - __ cmpq(out, op2); - } else { - __ cmpl(out, op2); - } - - __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 5a483e234b..d3b081e005 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -334,29 +334,14 @@ static bool IsAddConst(HInstruction* instruction, // Detect reductions of the following forms, // x = x_phi + .. // x = x_phi - .. -// x = max(x_phi, ..) // x = min(x_phi, ..) +// x = max(x_phi, ..) static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { - if (reduction->IsAdd()) { + if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); } else if (reduction->IsSub()) { return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi); - } else if (reduction->IsInvokeStaticOrDirect()) { - switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: - return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || - (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); - default: - return false; - } } return false; } @@ -1322,50 +1307,34 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } - } else if (instruction->IsInvokeStaticOrDirect()) { - // Accept particular intrinsics. - HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - HInstruction* r = opa; - HInstruction* s = opb; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoMinMax)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { - return false; // reject, unless all operands are same-extension narrower - } - // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr); - DCHECK(s != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - s = opb; - } - if (VectorizeUse(node, r, generate_code, type, restrictions) && - VectorizeUse(node, s, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp( - instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); - } - return true; - } - return false; + } else if (instruction->IsMin() || instruction->IsMax()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + HInstruction* s = opb; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoMinMax)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { + return false; // reject, unless all operands are same-extension narrower + } + // Accept MIN/MAX(x, y) for vectorizable operands. + DCHECK(r != nullptr); + DCHECK(s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + s = opb; + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp( + instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); } - default: - return false; - } // switch + return true; + } } return false; } @@ -1806,80 +1775,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), new (global_allocator_) HUShr(org_type, opa, opb, dex_pc)); + case HInstruction::kMin: + GENERATE_VEC( + new (global_allocator_) HVecMin(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMin(org_type, opa, opb, dex_pc)); + case HInstruction::kMax: + GENERATE_VEC( + new (global_allocator_) HVecMax(global_allocator_, + opa, + opb, + HVecOperation::ToProperType(type, is_unsigned), + vector_length_, + dex_pc), + new (global_allocator_) HMax(org_type, opa, opb, dex_pc)); case HInstruction::kAbs: DCHECK(opb == nullptr); GENERATE_VEC( new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), new (global_allocator_) HAbs(org_type, opa, dex_pc)); - case HInstruction::kInvokeStaticOrDirect: { - HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); - if (vector_mode_ == kVector) { - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: { - vector = new (global_allocator_) - HVecMin(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - vector = new (global_allocator_) - HVecMax(global_allocator_, - opa, - opb, - HVecOperation::ToProperType(type, is_unsigned), - vector_length_, - dex_pc); - break; - } - default: - LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); - UNREACHABLE(); - } // switch invoke - } else { - // In scalar code, simply clone the method invoke, and replace its operands with the - // corresponding new scalar instructions in the loop. The instruction will get an - // environment while being inserted from the instruction map in original program order. - DCHECK(vector_mode_ == kSequential); - size_t num_args = invoke->GetNumberOfArguments(); - HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( - global_allocator_, - num_args, - invoke->GetType(), - invoke->GetDexPc(), - invoke->GetDexMethodIndex(), - invoke->GetResolvedMethod(), - invoke->GetDispatchInfo(), - invoke->GetInvokeType(), - invoke->GetTargetMethod(), - invoke->GetClinitCheckRequirement()); - HInputsRef inputs = invoke->GetInputs(); - size_t num_inputs = inputs.size(); - DCHECK_LE(num_args, num_inputs); - DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree - for (size_t index = 0; index < num_inputs; ++index) { - HInstruction* new_input = index < num_args - ? vector_map_->Get(inputs[index]) - : inputs[index]; // beyond arguments: just pass through - new_invoke->SetArgumentAt(index, new_input); - } - new_invoke->SetIntrinsic(invoke->GetIntrinsic(), - kNeedsEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - vector = new_invoke; - } - break; - } default: break; } // switch diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 62550be526..9da46206da 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1384,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject { M(LoadException, Instruction) \ M(LoadString, Instruction) \ M(LongConstant, Constant) \ + M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ M(NativeDebugInfo, Instruction) \ @@ -5017,6 +5019,76 @@ class HRem FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Rem); }; +class HMin FINAL : public HBinaryOperation { + public: + HMin(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x <= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Min); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Min); +}; + +class HMax FINAL : public HBinaryOperation { + public: + HMax(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const OVERRIDE { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x >= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; } + + DECLARE_INSTRUCTION(Max); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Max); +}; + class HAbs FINAL : public HUnaryOperation { public: HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index a3ca6311c2..2591783cb6 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -234,12 +234,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { switch (invoke->GetIntrinsic()) { case Intrinsics::kMathAbsDouble: case Intrinsics::kMathAbsFloat: - LOG(FATAL) << "Unreachable abs"; - UNREACHABLE(); case Intrinsics::kMathMaxDoubleDouble: case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMinDoubleDouble: case Intrinsics::kMathMinFloatFloat: + LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered " + "to IR nodes by instruction simplifier"; + UNREACHABLE(); case Intrinsics::kMathRoundFloat: if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index cdbe48342d..bca538fb17 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -679,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsCompare() || instruction->IsCondition() || instruction->IsDiv() || + instruction->IsMin() || + instruction->IsMax() || instruction->IsMul() || instruction->IsOr() || instruction->IsRem() || diff --git a/test/445-checker-licm/src/Main.java b/test/445-checker-licm/src/Main.java index bd5d9e20c7..517aacd9f2 100644 --- a/test/445-checker-licm/src/Main.java +++ b/test/445-checker-licm/src/Main.java @@ -153,7 +153,6 @@ public class Main { return result; } - /// CHECK-START: int Main.invariantBoundIntrinsic(int) instruction_simplifier (before) /// CHECK-DAG: InvokeStaticOrDirect loop:{{B\d+}} // @@ -176,14 +175,17 @@ public class Main { return result; } - /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (before) + /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) instruction_simplifier (before) /// CHECK-DAG: InvokeStaticOrDirect loop:{{B\d+}} + /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (before) + /// CHECK-DAG: Max loop:{{B\d+}} + /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (after) - /// CHECK-NOT: InvokeStaticOrDirect loop:{{B\d+}} + /// CHECK-NOT: Max loop:{{B\d+}} /// CHECK-START: int Main.invariantBodyIntrinsic(int, int) licm (after) - /// CHECK-DAG: InvokeStaticOrDirect loop:none + /// CHECK-DAG: Max loop:none public static int invariantBodyIntrinsic(int x, int y) { int result = 0; diff --git a/test/562-checker-no-intermediate/src/Main.java b/test/562-checker-no-intermediate/src/Main.java index d61a9b1d89..2b19918142 100644 --- a/test/562-checker-no-intermediate/src/Main.java +++ b/test/562-checker-no-intermediate/src/Main.java @@ -18,7 +18,7 @@ public class Main { /** * Check that the intermediate address computation is not reordered or merged - * across the call to Math.abs(). + * across a method call. */ /// CHECK-START-ARM: void Main.main(java.lang.String[]) instruction_simplifier_arm (before) diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java index 45949ae90a..4e667bbc6f 100644 --- a/test/651-checker-byte-simd-minmax/src/Main.java +++ b/test/651-checker-byte-simd-minmax/src/Main.java @@ -23,7 +23,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -54,7 +54,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -74,7 +74,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -105,7 +105,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:a\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -125,7 +125,7 @@ public class Main { /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get>>,<<I100>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java index 9b056094a3..520e10b6c1 100644 --- a/test/651-checker-char-simd-minmax/src/Main.java +++ b/test/651-checker-char-simd-minmax/src/Main.java @@ -23,7 +23,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -43,7 +43,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -63,7 +63,7 @@ public class Main { /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get>>,<<I100>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java index 6b12e7e63c..2eaf907167 100644 --- a/test/651-checker-double-simd-minmax/src/Main.java +++ b/test/651-checker-double-simd-minmax/src/Main.java @@ -23,7 +23,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:d\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinDoubleDouble loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none // // TODO x86: 0.0 vs -0.0? @@ -45,7 +45,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:d\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none // // TODO x86: 0.0 vs -0.0? diff --git a/test/651-checker-float-simd-minmax/src/Main.java b/test/651-checker-float-simd-minmax/src/Main.java index 278a9c9367..dc09dfc7cc 100644 --- a/test/651-checker-float-simd-minmax/src/Main.java +++ b/test/651-checker-float-simd-minmax/src/Main.java @@ -23,7 +23,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:f\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinFloatFloat loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:f\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none // // TODO x86: 0.0 vs -0.0? @@ -45,7 +45,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:f\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxFloatFloat loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:f\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none // // TODO x86: 0.0 vs -0.0? diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java index cfa0ae7dca..82fad84d08 100644 --- a/test/651-checker-int-simd-minmax/src/Main.java +++ b/test/651-checker-int-simd-minmax/src/Main.java @@ -23,7 +23,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none // /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMin(int[], int[], int[]) loop_optimization (after) @@ -42,7 +42,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none // /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMax(int[], int[], int[]) loop_optimization (after) diff --git a/test/651-checker-long-simd-minmax/src/Main.java b/test/651-checker-long-simd-minmax/src/Main.java index 458cb8bf1b..f52686e54c 100644 --- a/test/651-checker-long-simd-minmax/src/Main.java +++ b/test/651-checker-long-simd-minmax/src/Main.java @@ -23,7 +23,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:j\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinLongLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:j\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none // // Not directly supported for longs. @@ -48,7 +48,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:j\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxLongLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:j\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none // // Not directly supported for longs. diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java index 5f10adab79..4300ca2951 100644 --- a/test/651-checker-short-simd-minmax/src/Main.java +++ b/test/651-checker-short-simd-minmax/src/Main.java @@ -23,7 +23,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -54,7 +54,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -74,7 +74,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -105,7 +105,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> Max [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // @@ -125,7 +125,7 @@ public class Main { /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> Min [<<Get>>,<<I100>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java index 3a0a0495c4..fcd50a6d15 100644 --- a/test/661-checker-simd-reduc/src/Main.java +++ b/test/661-checker-simd-reduc/src/Main.java @@ -378,7 +378,7 @@ public class Main { /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM>>,{{i\d+}}] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none - /// CHECK-DAG: InvokeStaticOrDirect [<<Phi2>>,<<Get>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Min [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: Return [<<Phi2>>] loop:none // @@ -438,7 +438,7 @@ public class Main { /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM>>,{{i\d+}}] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none - /// CHECK-DAG: InvokeStaticOrDirect [<<Phi2>>,<<Get>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Max [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: Return [<<Phi2>>] loop:none // |