diff options
author | 2015-08-12 11:43:20 +0000 | |
---|---|---|
committer | 2015-08-12 11:43:20 +0000 | |
commit | 6a5037eb3340e4c981fd7de3ff45167ee5b7fc82 (patch) | |
tree | 2d7a59bfecc34628f3d1def79407b153650c1ac6 /compiler/optimizing | |
parent | 2b562f400cf9717f197ae3ae1f9a32c96769ef06 (diff) | |
parent | cfa410b0ea561318f74a76c5323f0f6cd8eaaa50 (diff) |
Merge "[optimizing] More x86_64 code improvements"
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 58 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 3 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 40 |
3 files changed, 40 insertions, 61 deletions
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 6991414e62..1585104789 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -104,7 +104,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { if (is_div_) { __ negl(cpu_reg_); } else { - __ movl(cpu_reg_, Immediate(0)); + __ xorl(cpu_reg_, cpu_reg_); } } else { @@ -749,8 +749,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { DCHECK(constant->IsLongConstant()); value = constant->AsLongConstant()->GetValue(); } - Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + Store64BitValueToStack(destination, value); } else { DCHECK(source.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -784,8 +783,7 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, if (location.IsRegister()) { Load64BitValue(location.AsRegister<CpuRegister>(), value); } else if (location.IsDoubleStackSlot()) { - Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP)); + Store64BitValueToStack(location, value); } else { DCHECK(location.IsConstant()); DCHECK_EQ(location.GetConstant(), const_to_move); @@ -1849,14 +1847,12 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { // Processing a Dex `float-to-int' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-int' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1884,14 +1880,12 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { // Processing a Dex `float-to-long' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; case Primitive::kPrimDouble: // Processing a Dex `double-to-long' instruction. locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -2067,14 +2061,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `float-to-int' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; __ movl(output, Immediate(kPrimIntMax)); - // temp = int-to-float(output) - __ cvtsi2ss(temp, output, false); - // if input >= temp goto done - __ comiss(input, temp); + // if input >= (float)INT_MAX goto done + __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2092,14 +2083,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `double-to-int' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; __ movl(output, Immediate(kPrimIntMax)); - // temp = int-to-double(output) - __ cvtsi2sd(temp, output); - // if input >= temp goto done - __ comisd(input, temp); + // if input >= (double)INT_MAX goto done + __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2137,14 +2125,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `float-to-long' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; codegen_->Load64BitValue(output, kPrimLongMax); - // temp = long-to-float(output) - __ cvtsi2ss(temp, output, true); - // if input >= temp goto done - __ comiss(input, temp); + // if input >= (float)LONG_MAX goto done + __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -2162,14 +2147,11 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `double-to-long' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; codegen_->Load64BitValue(output, kPrimLongMax); - // temp = long-to-double(output) - __ cvtsi2sd(temp, output, true); - // if input >= temp goto done - __ comisd(input, temp); + // if input >= (double)LONG_MAX goto done + __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax)); __ j(kAboveEqual, &done); // if input == NaN goto nan __ j(kUnordered, &nan); @@ -4339,8 +4321,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - codegen_->Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + codegen_->Store64BitValueToStack(destination, value); } } else if (constant->IsFloatConstant()) { float fp_value = constant->AsFloatConstant()->GetValue(); @@ -4371,8 +4352,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - codegen_->Load64BitValue(CpuRegister(TMP), value); - __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + codegen_->Store64BitValueToStack(destination, value); } } } else if (source.IsFpuRegister()) { @@ -4874,6 +4854,18 @@ void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { } } +void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { + DCHECK(dest.IsDoubleStackSlot()); + if (IsInt<32>(value)) { + // Can move directly as an int32 constant. + __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), + Immediate(static_cast<int32_t>(value))); + } else { + Load64BitValue(CpuRegister(TMP), value); + __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP)); + } +} + void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 3b3915f2ae..41bebac240 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -322,6 +322,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Load a 64 bit value into a register in the most efficient manner. void Load64BitValue(CpuRegister dest, int64_t value); + // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. + void Store64BitValueToStack(Location dest, int64_t value); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 62cdb4c34a..85d40d7a0a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -282,8 +282,6 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); - // TODO: Allow x86 to work with memory. This requires assembler support, see below. - // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. } @@ -294,34 +292,18 @@ static void MathAbsFP(LocationSummary* locations, CodeGeneratorX86_64* codegen) { Location output = locations->Out(); - if (output.IsFpuRegister()) { - // In-register - XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + DCHECK(output.IsFpuRegister()); + XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - // TODO: Can mask directly with constant area using pand if we can guarantee - // that the literal is aligned on a 16 byte boundary. This will avoid a - // temporary. - if (is64bit) { - __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } else { - __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); - __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } + // TODO: Can mask directly with constant area using pand if we can guarantee + // that the literal is aligned on a 16 byte boundary. This will avoid a + // temporary. + if (is64bit) { + __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); } else { - // TODO: update when assember support is available. - UNIMPLEMENTED(FATAL) << "Needs assembler support."; -// Once assembler support is available, in-memory operations look like this: -// if (is64bit) { -// DCHECK(output.IsDoubleStackSlot()); -// // No 64b and with literal. -// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); -// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp); -// } else { -// DCHECK(output.IsStackSlot()); -// // Can use and with a literal directly. -// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF))); -// } + __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); + __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); } } @@ -736,6 +718,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { codegen_->Load64BitValue(out, kPrimIntMax); // if inPlusPointFive >= maxInt goto done + __ movl(out, Immediate(kPrimIntMax)); __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); __ j(kAboveEqual, &done); @@ -783,6 +766,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { codegen_->Load64BitValue(out, kPrimLongMax); // if inPlusPointFive >= maxLong goto done + __ movq(out, Immediate(kPrimLongMax)); __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); __ j(kAboveEqual, &done); |