Optimizing: Improve floating point comparisons on arm and arm64.
Avoid the extra check for unordered inputs by using the
appropriate arm/arm64 condition.
Change-Id: Ib5e775a90428db7a2cf377ad9fd6a3192d670617
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a11ceb9..53790b9 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -728,6 +728,24 @@
UNREACHABLE();
}
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+ // The ARM condition codes can express all the necessary branches, see the
+ // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+ // There is no dex instruction or HIR that would need the missing conditions
+ // "equal or unordered" or "not equal".
+ switch (cond) {
+ case kCondEQ: return EQ;
+ case kCondNE: return NE /* unordered */;
+ case kCondLT: return gt_bias ? CC : LT /* unordered */;
+ case kCondLE: return gt_bias ? LS : LE /* unordered */;
+ case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+ case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << Register(reg);
}
@@ -1416,15 +1434,9 @@
void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
Label* true_label,
- Label* false_label) {
+ Label* false_label ATTRIBUTE_UNUSED) {
__ vmstat(); // transfer FP status register to ARM APSR.
- // TODO: merge into a single branch (except "equal or unordered" and "not equal")
- if (cond->IsFPConditionTrueIfNaN()) {
- __ b(true_label, VS); // VS for unordered.
- } else if (cond->IsFPConditionFalseIfNaN()) {
- __ b(false_label, VS); // VS for unordered.
- }
- __ b(true_label, ARMCondition(cond->GetCondition()));
+ __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
}
void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -3803,6 +3815,7 @@
Label less, greater, done;
Primitive::Type type = compare->InputAt(0)->GetType();
+ Condition less_cond;
switch (type) {
case Primitive::kPrimLong: {
__ cmp(left.AsRegisterPairHigh<Register>(),
@@ -3813,6 +3826,7 @@
__ LoadImmediate(out, 0);
__ cmp(left.AsRegisterPairLow<Register>(),
ShifterOperand(right.AsRegisterPairLow<Register>())); // Unsigned compare.
+ less_cond = LO;
break;
}
case Primitive::kPrimFloat:
@@ -3825,14 +3839,15 @@
FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
}
__ vmstat(); // transfer FP status register to ARM APSR.
- __ b(compare->IsGtBias() ? &greater : &less, VS); // VS for unordered.
+ less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
break;
}
default:
LOG(FATAL) << "Unexpected compare type " << type;
+ UNREACHABLE();
}
__ b(&done, EQ);
- __ b(&less, LO); // LO is for both: unsigned compare for longs and 'less than' for floats.
+ __ b(&less, less_cond);
__ Bind(&greater);
__ LoadImmediate(out, 1);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 6ed2c5a..55fa1fa 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,6 +93,24 @@
UNREACHABLE();
}
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+ // The ARM64 condition codes can express all the necessary branches, see the
+ // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+ // There is no dex instruction or HIR that would need the missing conditions
+ // "equal or unordered" or "not equal".
+ switch (cond) {
+ case kCondEQ: return eq;
+ case kCondNE: return ne /* unordered */;
+ case kCondLT: return gt_bias ? cc : lt /* unordered */;
+ case kCondLE: return gt_bias ? ls : le /* unordered */;
+ case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+ case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
Location ARM64ReturnLocation(Primitive::Type return_type) {
// Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
// same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -2381,12 +2399,8 @@
} else {
__ Fcmp(left, InputFPRegisterAt(compare, 1));
}
- if (compare->IsGtBias()) {
- __ Cset(result, ne);
- } else {
- __ Csetm(result, ne);
- }
- __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+ __ Cset(result, ne);
+ __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
break;
}
default:
@@ -2422,7 +2436,6 @@
LocationSummary* locations = instruction->GetLocations();
Register res = RegisterFrom(locations->Out(), instruction->GetType());
IfCondition if_cond = instruction->GetCondition();
- Condition arm64_cond = ARM64Condition(if_cond);
if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
FPRegister lhs = InputFPRegisterAt(instruction, 0);
@@ -2433,20 +2446,13 @@
} else {
__ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
}
- __ Cset(res, arm64_cond);
- if (instruction->IsFPConditionTrueIfNaN()) {
- // res = IsUnordered(arm64_cond) ? 1 : res <=> res = IsNotUnordered(arm64_cond) ? res : 1
- __ Csel(res, res, Operand(1), vc); // VC for "not unordered".
- } else if (instruction->IsFPConditionFalseIfNaN()) {
- // res = IsUnordered(arm64_cond) ? 0 : res <=> res = IsNotUnordered(arm64_cond) ? res : 0
- __ Csel(res, res, Operand(0), vc); // VC for "not unordered".
- }
+ __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
} else {
// Integer cases.
Register lhs = InputRegisterAt(instruction, 0);
Operand rhs = InputOperandAt(instruction, 1);
__ Cmp(lhs, rhs);
- __ Cset(res, arm64_cond);
+ __ Cset(res, ARM64Condition(if_cond));
}
}
@@ -2816,15 +2822,11 @@
} else {
__ Fcmp(lhs, InputFPRegisterAt(condition, 1));
}
- if (condition->IsFPConditionTrueIfNaN()) {
- __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
- } else if (condition->IsFPConditionFalseIfNaN()) {
- __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
- }
if (true_target == nullptr) {
- __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+ IfCondition opposite_condition = condition->GetOppositeCondition();
+ __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
} else {
- __ B(ARM64Condition(condition->GetCondition()), true_target);
+ __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
}
} else {
// Integer cases.