diff options
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 75 | ||||
| -rw-r--r-- | compiler/optimizing/pc_relative_fixups_x86.cc | 1 | ||||
| -rw-r--r-- | test/580-checker-round/src/Main.java | 48 |
3 files changed, 79 insertions, 45 deletions
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 65f4def48b..83cc278576 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -752,20 +752,20 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); } -// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble, -// as it needs 64 bit instructions. void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } - // Do we have instruction support? if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { + HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); + DCHECK(static_or_direct != nullptr); LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); + if (static_or_direct->HasSpecialInput() && + invoke->InputAt( + static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -774,7 +774,7 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { // We have to fall back to a call to the intrinsic. LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly); + LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::RegisterLocation(EAX)); @@ -784,47 +784,42 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); - if (locations->WillCall()) { + if (locations->WillCall()) { // TODO: can we reach this? InvokeOutOfLineIntrinsic(codegen_, invoke); return; } - // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + Register constant_area = locations->InAt(1).AsRegister<Register>(); + XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); Register out = locations->Out().AsRegister<Register>(); - XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - NearLabel done, nan; + NearLabel skip_incr, done; X86Assembler* assembler = GetAssembler(); - // Generate 0.5 into inPlusPointFive. - __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); - __ movd(inPlusPointFive, out); - - // Add in the input. - __ addss(inPlusPointFive, in); - - // And truncate to an integer. - __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); - + // Since no direct x86 rounding instruction matches the required semantics, + // this intrinsic is implemented as follows: + // result = floor(in); + // if (in - result >= 0.5f) + // result = result + 1.0f; + __ movss(t2, in); + __ roundss(t1, in, Immediate(1)); + __ subss(t2, t1); + __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); + __ j(kBelow, &skip_incr); + __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); + __ Bind(&skip_incr); + + // Final conversion to an integer. Unfortunately this also does not have a + // direct x86 instruction, since NaN should map to 0 and large positive + // values need to be clipped to the extreme value. __ movl(out, Immediate(kPrimIntMax)); - // maxInt = int-to-float(out) - __ cvtsi2ss(maxInt, out); - - // if inPlusPointFive >= maxInt goto done - __ comiss(inPlusPointFive, maxInt); - __ j(kAboveEqual, &done); - - // if input == NaN goto nan - __ j(kUnordered, &nan); - - // output = float-to-int-truncate(input) - __ cvttss2si(out, inPlusPointFive); - __ jmp(&done); - __ Bind(&nan); - - // output = 0 - __ xorl(out, out); + __ cvtsi2ss(t2, out); + __ comiss(t1, t2); + __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered + __ movl(out, Immediate(0)); // does not change flags + __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) + __ cvttss2si(out, t1); __ Bind(&done); } diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 921f3dfff6..ad0921d7e6 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -227,6 +227,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMinDoubleDouble: case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathRoundFloat: if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); diff --git a/test/580-checker-round/src/Main.java b/test/580-checker-round/src/Main.java index 9e248ef95a..83bc55c480 100644 --- a/test/580-checker-round/src/Main.java +++ b/test/580-checker-round/src/Main.java @@ -36,7 +36,8 @@ public class Main { expectEquals32(-2, round32(-1.51f)); expectEquals32(-1, round32(-1.2f)); expectEquals32(-1, round32(-1.0f)); - expectEquals32(-1, round32(-0.51f)); + expectEquals32(-1, round32(-0.5000001f)); + expectEquals32(0, round32(-0.5f)); expectEquals32(0, round32(-0.2f)); expectEquals32(0, round32(-0.0f)); expectEquals32(0, round32(+0.0f)); @@ -47,11 +48,23 @@ public class Main { expectEquals32(2, round32(+1.5f)); expectEquals32(2147483647, round32(Float.POSITIVE_INFINITY)); + // Near minint. + expectEquals32(-2147483648, round32(Math.nextAfter(-2147483648.0f, Float.NEGATIVE_INFINITY))); + expectEquals32(-2147483648, round32(-2147483648.0f)); + expectEquals32(-2147483520, round32(Math.nextAfter(-2147483648.0f, Float.POSITIVE_INFINITY))); + + // Near maxint. + expectEquals32(2147483520, round32(Math.nextAfter(2147483648.0f, Float.NEGATIVE_INFINITY))); + expectEquals32(2147483647, round32(2147483648.0f)); + expectEquals32(2147483647, round32(Math.nextAfter(2147483648.0f, Float.POSITIVE_INFINITY))); + // Some others. for (int i = -100; i <= 100; ++i) { expectEquals32(i - 1, round32((float) i - 0.51f)); + expectEquals32(i, round32((float) i - 0.5f)); expectEquals32(i, round32((float) i)); expectEquals32(i + 1, round32((float) i + 0.5f)); + expectEquals32(i + 1, round32((float) i + 0.51f)); } for (float f = -1.5f; f <= -1.499f; f = Math.nextAfter(f, Float.POSITIVE_INFINITY)) { expectEquals32(-1, round32(f)); @@ -61,8 +74,10 @@ public class Main { float[] fvals = { -16777215.5f, -16777215.0f, - -0.4999f, - 0.4999f, + -0.49999998f, + -0.4999999701976776123046875f, + 0.4999999701976776123046875f, + 0.49999998f, 16777215.0f, 16777215.5f }; @@ -71,6 +86,8 @@ public class Main { -16777215, 0, 0, + 0, + 0, 16777215, 16777216 }; @@ -98,7 +115,8 @@ public class Main { expectEquals64(-2L, round64(-1.51d)); expectEquals64(-1L, round64(-1.2d)); expectEquals64(-1L, round64(-1.0d)); - expectEquals64(-1L, round64(-0.51d)); + expectEquals64(-1L, round64(-0.5000001f)); + expectEquals64(0L, round64(-0.5d)); expectEquals64(0L, round64(-0.2d)); expectEquals64(0L, round64(-0.0d)); expectEquals64(0L, round64(+0.0d)); @@ -109,11 +127,27 @@ public class Main { expectEquals64(2L, round64(+1.5d)); expectEquals64(9223372036854775807L, round64(Double.POSITIVE_INFINITY)); + // Near minlong. + expectEquals64(-9223372036854775808L, + round64(Math.nextAfter(-9223372036854775808.0, Double.NEGATIVE_INFINITY))); + expectEquals64(-9223372036854775808L, round64(-9223372036854775808.0)); + expectEquals64(-9223372036854774784L, + round64(Math.nextAfter(-9223372036854775809.0, Double.POSITIVE_INFINITY))); + + // Near maxlong. + expectEquals64(9223372036854774784L, + round64(Math.nextAfter(9223372036854775808.0, Double.NEGATIVE_INFINITY))); + expectEquals64(9223372036854775807L, round64(9223372036854775808.0)); + expectEquals64(9223372036854775807L, + round64(Math.nextAfter(9223372036854775808.0, Double.POSITIVE_INFINITY))); + // Some others. for (long l = -100; l <= 100; ++l) { expectEquals64(l - 1, round64((double) l - 0.51d)); + expectEquals64(l, round64((double) l - 0.5d)); + expectEquals64(l, round64((double) l)); expectEquals64(l + 1, round64((double) l + 0.5d)); - expectEquals64(l + 1, round64((double) l + 0.5d)); + expectEquals64(l + 1, round64((double) l + 0.51d)); } for (double d = -1.5d; d <= -1.49999999999d; d = Math.nextAfter(d, Double.POSITIVE_INFINITY)) { expectEquals64(-1L, round64(d)); @@ -123,8 +157,10 @@ public class Main { double[] dvals = { -9007199254740991.5d, -9007199254740991.0d, + -0.49999999999999997d, -0.49999999999999994d, 0.49999999999999994d, + 0.49999999999999997d, 9007199254740991.0d, 9007199254740991.5d }; @@ -133,6 +169,8 @@ public class Main { -9007199254740991L, 0L, 0L, + 0L, + 0L, 9007199254740991L, 9007199254740992L }; |