diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 114 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 125 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 3 |
4 files changed, 199 insertions, 46 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 5b09fc190b..57f01e8e16 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -40,6 +40,8 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { }; static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static constexpr int kC2ConditionMask = 0x400; + // Marker for places that can be updated once we don't follow the quick ABI. static constexpr bool kFollowsQuickABI = true; @@ -2076,6 +2078,81 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move32(stack_temp, source); + __ flds(Address(ESP, temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move64(stack_temp, source); + __ fldl(Address(ESP, temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subl(ESP, Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(EAX, Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(ESP, 0)); + } else { + __ fstl(Address(ESP, 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } else { + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + } + + // And remove the temporary stack space we allocated. + __ addl(ESP, Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); @@ -2209,10 +2286,8 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = type == Primitive::kPrimInt - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: { @@ -2231,24 +2306,12 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; } + case Primitive::kPrimDouble: case Primitive::kPrimFloat: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX, ECX). - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); - break; - } - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX). - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(EAX)); break; } @@ -2265,14 +2328,9 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod))); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index b77a1aa856..a9086f8876 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -137,6 +137,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateRemFP(HRem *rem); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); @@ -144,6 +145,8 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 196e0cf666..dd6861f67b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -47,6 +47,8 @@ static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +static constexpr int kC2ConditionMask = 0x400; + class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeRuntimeCallingConvention() @@ -583,8 +585,18 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int32_t value; + if (constant->IsFloatConstant()) { + value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); + } else { + DCHECK(constant->IsIntConstant()); + value = constant->AsIntConstant()->GetValue(); + } + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } else { - DCHECK(source.IsStackSlot()); + DCHECK(source.IsStackSlot()) << source; __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } @@ -596,6 +608,17 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } else if (source.IsFpuRegister()) { __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value = constant->AsLongConstant()->GetValue(); + if (constant->IsDoubleConstant()) { + value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); + } else { + DCHECK(constant->IsLongConstant()); + value = constant->AsLongConstant()->GetValue(); + } + __ movq(CpuRegister(TMP), Immediate(value)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(source.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -2000,6 +2023,81 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float) { + if (source.IsStackSlot()) { + DCHECK(is_float); + __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else if (source.IsDoubleStackSlot()) { + DCHECK(!is_float); + __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); + } else { + // Write the value to the temporary location on the stack and load to FP stack. + if (is_float) { + Location stack_temp = Location::StackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ flds(Address(CpuRegister(RSP), temp_offset)); + } else { + Location stack_temp = Location::DoubleStackSlot(temp_offset); + codegen_->Move(stack_temp, source); + __ fldl(Address(CpuRegister(RSP), temp_offset)); + } + } +} + +void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { + Primitive::Type type = rem->GetResultType(); + bool is_float = type == Primitive::kPrimFloat; + size_t elem_size = Primitive::ComponentSize(type); + LocationSummary* locations = rem->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location out = locations->Out(); + + // Create stack space for 2 elements. + // TODO: enhance register allocator to ask for stack temporaries. + __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); + + // Load the values to the FP stack in reverse order, using temporaries if needed. + PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); + PushOntoFPStack(first, 0, 2 * elem_size, is_float); + + // Loop doing FPREM until we stabilize. + Label retry; + __ Bind(&retry); + __ fprem(); + + // Move FP status to AX. + __ fstsw(); + + // And see if the argument reduction is complete. This is signaled by the + // C2 FPU flag bit set to 0. + __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); + __ j(kNotEqual, &retry); + + // We have settled on the final value. Retrieve it into an XMM register. + // Store FP top of stack to real stack. + if (is_float) { + __ fsts(Address(CpuRegister(RSP), 0)); + } else { + __ fstl(Address(CpuRegister(RSP), 0)); + } + + // Pop the 2 items from the FP stack. + __ fucompp(); + + // Load the value from the stack into an XMM register. + DCHECK(out.IsFpuRegister()) << out; + if (is_float) { + __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); + } else { + __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); + } + + // And remove the temporary stack space we allocated. + __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); +} + void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); @@ -2099,11 +2197,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { void LocationsBuilderX86_64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = - (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) - ? LocationSummary::kNoCall - : LocationSummary::kCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { case Primitive::kPrimInt: @@ -2117,11 +2212,10 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - // The runtime helper puts the result in XMM0. - locations->SetOut(Location::FpuRegisterLocation(XMM0)); + locations->SetInAt(0, Location::Any()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(RAX)); break; } @@ -2138,14 +2232,9 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); - break; - } + case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true)); - codegen_->RecordPcInfo(rem, rem->GetDexPc()); + GenerateRemFP(rem); break; } default: diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index befe994efb..ead771a1f2 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -155,6 +155,7 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); + void GenerateRemFP(HRem *rem); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -162,6 +163,8 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void PushOntoFPStack(Location source, uint32_t temp_offset, + uint32_t stack_adjustment, bool is_float); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; |