diff options
| author | 2014-11-27 12:06:00 +0000 | |
|---|---|---|
| committer | 2014-11-27 12:06:00 +0000 | |
| commit | 647b9ed41cdb7cf302fd356627a3ba372419b78c (patch) | |
| tree | f1ca054aa20ae4c489f208982e7a6cba5d5ee21e /compiler/optimizing | |
| parent | 35ecc8ca8fba713728b8fc60e9e2a275da2028aa (diff) | |
Add support for long-to-double in the optimizing compiler.
- Add support for the long-to-double Dex instruction in the
optimizing compiler.
- Enable requests of temporary FPU (double) registers during
code generation.
- Fix art::x86::X86Assembler::LoadLongConstant and extend
it to int64_t values.
- Have art::x86_64::X86_64Assembler::cvtsi2sd work with
64-bit operands.
- Generate x86, x86-64 and ARM (but not ARM64) code for
long to double HTypeConversion nodes.
- Add related tests to test/422-type-conversion.
Change-Id: Ie73d9e5e25bd2e15f585c371e8fc2dcb83438ccd
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/builder.cc | 5 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 22 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 44 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 39 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 11 |
5 files changed, 113 insertions, 8 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index b261460690..35360fef6c 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -1034,6 +1034,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::LONG_TO_DOUBLE: { + Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble); + break; + } + case Instruction::INT_TO_BYTE: { Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte); break; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0b593275c7..6cacd4fa23 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -228,7 +228,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { DCHECK(!blocked_fpu_registers_[loc.reg()]); blocked_fpu_registers_[loc.reg()] = true; } else { - DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister); + DCHECK(loc.GetPolicy() == Location::kRequiresRegister + || loc.GetPolicy() == Location::kRequiresFpuRegister); } } @@ -259,10 +260,21 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { Location loc = locations->GetTemp(i); if (loc.IsUnallocated()) { - DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister); - // TODO: Adjust handling of temps. We currently consider temps to use - // core registers. They may also use floating point registers at some point. - loc = AllocateFreeRegister(Primitive::kPrimInt); + switch (loc.GetPolicy()) { + case Location::kRequiresRegister: + // Allocate a core register (large enough to fit a 32-bit integer). + loc = AllocateFreeRegister(Primitive::kPrimInt); + break; + + case Location::kRequiresFpuRegister: + // Allocate a core register (large enough to fit a 64-bit double). + loc = AllocateFreeRegister(Primitive::kPrimDouble); + break; + + default: + LOG(FATAL) << "Unexpected policy for temporary location " + << loc.GetPolicy(); + } locations->SetTempAt(i, loc); } } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 890cfdd0e6..22a859071b 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1481,6 +1481,14 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1645,7 +1653,41 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio break; } - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + // Processing a Dex `long-to-double' instruction. + Register low = in.AsRegisterPairLow<Register>(); + Register high = in.AsRegisterPairHigh<Register>(); + SRegister out_s = out.AsFpuRegisterPairLow<SRegister>(); + DRegister out_d = FromLowSToD(out_s); + Register constant_low = locations->GetTemp(0).As<Register>(); + Register constant_high = locations->GetTemp(1).As<Register>(); + SRegister temp_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>(); + DRegister temp_d = FromLowSToD(temp_s); + + // Binary encoding of 2^32 for type double. + const uint64_t c = UINT64_C(0x41F0000000000000); + + // out_d = int-to-double(high) + __ vmovsr(out_s, high); + __ vcvtdi(out_d, out_s); + // Using vmovd to load the `c` constant as an immediate + // value into `temp_d` does not work, as this instruction + // only transfers 8 significant bits of its immediate + // operand. Instead, use two 32-bit core registers to + // load `c` into `temp_d`. + __ LoadImmediate(constant_low, Low32Bits(c)); + __ LoadImmediate(constant_high, High32Bits(c)); + __ vmovdrr(temp_d, constant_low, constant_high); + // out_d = out_d * 2^32 + __ vmuld(out_d, out_d, temp_d); + // temp_d = unsigned-to-double(low) + __ vmovsr(temp_s, low); + __ vcvtdu(temp_d, temp_s); + // out_d = out_d + temp_d + __ vaddd(out_d, out_d, temp_d); + break; + } + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 3689452234..c482885b02 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1429,6 +1429,13 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1608,7 +1615,37 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio __ cvtsi2sd(out.As<XmmRegister>(), in.As<Register>()); break; - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + // Processing a Dex `long-to-double' instruction. + Register low = in.AsRegisterPairLow<Register>(); + Register high = in.AsRegisterPairHigh<Register>(); + XmmRegister result = out.As<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).As<XmmRegister>(); + XmmRegister constant = locations->GetTemp(1).As<XmmRegister>(); + + // Binary encoding of 2^32 for type double. + const int64_t c1 = INT64_C(0x41F0000000000000); + // Binary encoding of 2^31 for type double. + const int64_t c2 = INT64_C(0x41E0000000000000); + + // low = low - 2^31 (to prevent bit 31 of `low` to be + // interpreted as a sign bit) + __ subl(low, Immediate(0x80000000)); + // temp = int-to-double(high) + __ cvtsi2sd(temp, high); + // temp = temp * 2^32 + __ LoadLongConstant(constant, c1); + __ mulsd(temp, constant); + // result = int-to-double(low) + __ cvtsi2sd(result, low); + // result = result + 2^31 (restore the original value of `low`) + __ LoadLongConstant(constant, c2); + __ addsd(result, constant); + // result = result + temp + __ addsd(result, temp); + break; + } + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 34fa1e7a3b..63938f3fa8 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1430,6 +1430,11 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1609,10 +1614,14 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimChar: - __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>()); + __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>(), false); break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>(), true); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; |