diff options
| author | 2015-04-07 17:04:50 +0600 | |
|---|---|---|
| committer | 2015-04-13 11:30:02 +0000 | |
| commit | 2d45b4df3838d9c0e5a213305ccd1d7009e01437 (patch) | |
| tree | b3893899a540ba9f4c8cd70e69536d0239a9d3ef | |
| parent | 1576be32be4a99a1cffdaaf209a3cd67e8b2f88a (diff) | |
Optimizing: Fix long-to-fp conversion on x86.
long-to-fp conversion implemented using SSE loses the precision.
The test is included. CL uses FPU to provide the correct result.
Change-Id: I8eaf3c46819a8cb52642a7e7d7c4e3e0edbc88db
Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com>
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 158 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 4 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 7 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86.h | 1 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 7 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 1 | ||||
| -rw-r--r-- | test/422-type-conversion/src/Main.java | 1 |
7 files changed, 105 insertions, 74 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 92b62e2c84..ef896aa949 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1525,10 +1525,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-float' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimDouble: @@ -1558,10 +1556,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: // Processing a Dex `long-to-double' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::Any()); break; case Primitive::kPrimFloat: @@ -1782,37 +1778,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-float' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // Operations use doubles for precision reasons (each 32-bit - // half of a long fits in the 53-bit mantissa of a double, - // but not in the 24-bit mantissa of a float). This is - // especially important for the low bits. The result is - // eventually converted to float. - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // result = double-to-float(result) - __ cvtsd2ss(result, result); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsStackSlot()) { + __ fstps(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstps(Address(ESP, 0)); + Location stack_temp = Location::StackSlot(0); + codegen_->Move32(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -1841,29 +1831,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: { // Processing a Dex `long-to-double' instruction. - Register low = in.AsRegisterPairLow<Register>(); - Register high = in.AsRegisterPairHigh<Register>(); - XmmRegister result = out.AsFpuRegister<XmmRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - - // low = low - 2^31 (to prevent bit 31 of `low` to be - // interpreted as a sign bit) - __ subl(low, Immediate(0x80000000)); - // temp = int-to-double(high) - __ cvtsi2sd(temp, high); - // temp = temp * 2^32 - __ LoadLongConstant(constant, k2Pow32EncodingForDouble); - __ mulsd(temp, constant); - // result = int-to-double(low) - __ cvtsi2sd(result, low); - // result = result + 2^31 (restore the original value of `low`) - __ LoadLongConstant(constant, k2Pow31EncodingForDouble); - __ addsd(result, constant); - // result = result + temp - __ addsd(result, temp); - // Restore low. - __ addl(low, Immediate(0x80000000)); + size_t adjustment = 0; + + // Create stack space for the call to + // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below. + // TODO: enhance register allocator to ask for stack temporaries. + if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) { + adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + __ subl(ESP, Immediate(adjustment)); + } + + // Load the value to the FP stack, using temporaries if needed. + PushOntoFPStack(in, 0, adjustment, false, true); + + if (out.IsDoubleStackSlot()) { + __ fstpl(Address(ESP, out.GetStackIndex() + adjustment)); + } else { + __ fstpl(Address(ESP, 0)); + Location stack_temp = Location::DoubleStackSlot(0); + codegen_->Move64(out, stack_temp); + } + + // Remove the temporary stack space we allocated. + if (adjustment != 0) { + __ addl(ESP, Immediate(adjustment)); + } break; } @@ -2203,24 +2195,43 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } -void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float) { +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, + uint32_t temp_offset, + uint32_t stack_adjustment, + bool is_fp, + bool is_wide) { if (source.IsStackSlot()) { - DCHECK(is_float); - __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(!is_wide); + if (is_fp) { + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else if (source.IsDoubleStackSlot()) { - DCHECK(!is_float); - __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + DCHECK(is_wide); + if (is_fp) { + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } } else { // Write the value to the temporary location on the stack and load to FP stack. - if (is_float) { + if (!is_wide) { Location stack_temp = Location::StackSlot(temp_offset); codegen_->Move32(stack_temp, source); - __ flds(Address(ESP, temp_offset)); + if (is_fp) { + __ flds(Address(ESP, temp_offset)); + } else { + __ filds(Address(ESP, temp_offset)); + } } else { Location stack_temp = Location::DoubleStackSlot(temp_offset); codegen_->Move64(stack_temp, source); - __ fldl(Address(ESP, temp_offset)); + if (is_fp) { + __ fldl(Address(ESP, temp_offset)); + } else { + __ fildl(Address(ESP, temp_offset)); + } } } } @@ -2239,8 +2250,9 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ subl(ESP, Immediate(2 * elem_size)); // Load the values to the FP stack in reverse order, using temporaries if needed. - PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); - PushOntoFPStack(first, 0, 2 * elem_size, is_float); + const bool is_wide = !is_float; + PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide); + PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); // Loop doing FPREM until we stabilize. Label retry; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 0cc3c6533a..1cb9e326ce 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -171,8 +171,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. + // `is_wide` specifies whether it is long/double or not. void PushOntoFPStack(Location source, uint32_t temp_offset, - uint32_t stack_adjustment, bool is_float); + uint32_t stack_adjustment, bool is_fp, bool is_wide); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 4cca529258..7b206a3ffd 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -883,6 +883,13 @@ void X86Assembler::fildl(const Address& src) { } +void X86Assembler::filds(const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xDB); + EmitOperand(0, src); +} + + void X86Assembler::fincstp() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index f3675aeceb..a933474a39 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -349,6 +349,7 @@ class X86Assembler FINAL : public Assembler { void fistpl(const Address& dst); void fistps(const Address& dst); void fildl(const Address& src); + void filds(const Address& src); void fincstp(); void ffree(const Immediate& index); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 2e0d9e1840..699b0b22f7 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -984,6 +984,13 @@ void X86_64Assembler::fildl(const Address& src) { } +void X86_64Assembler::filds(const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xDB); + EmitOperand(0, src); +} + + void X86_64Assembler::fincstp() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xD9); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index a786a6cbff..3df095091a 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -389,6 +389,7 @@ class X86_64Assembler FINAL : public Assembler { void fistpl(const Address& dst); void fistps(const Address& dst); void fildl(const Address& src); + void filds(const Address& src); void fincstp(); void ffree(const Immediate& index); diff --git a/test/422-type-conversion/src/Main.java b/test/422-type-conversion/src/Main.java index 7ce2868283..da5bd766a3 100644 --- a/test/422-type-conversion/src/Main.java +++ b/test/422-type-conversion/src/Main.java @@ -321,6 +321,7 @@ public class Main { assertFloatEquals(9223372036854775807F, $opt$LongToFloat(9223372036854775807L)); // 2^63 - 1 assertFloatEquals(-9223372036854775807F, $opt$LongToFloat(-9223372036854775807L)); // -(2^63 - 1) assertFloatEquals(-9223372036854775808F, $opt$LongToFloat(-9223372036854775808L)); // -(2^63) + assertFloatEquals(Float.intBitsToFloat(-555858671), $opt$LongToFloat(-8008112895877447681L)); } private static void longToDouble() { |