Merge "[optimizing] More x86_64 code improvements"
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6991414..1585104 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -104,7 +104,7 @@
if (is_div_) {
__ negl(cpu_reg_);
} else {
- __ movl(cpu_reg_, Immediate(0));
+ __ xorl(cpu_reg_, cpu_reg_);
}
} else {
@@ -749,8 +749,7 @@
DCHECK(constant->IsLongConstant());
value = constant->AsLongConstant()->GetValue();
}
- Load64BitValue(CpuRegister(TMP), value);
- __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+ Store64BitValueToStack(destination, value);
} else {
DCHECK(source.IsDoubleStackSlot());
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -784,8 +783,7 @@
if (location.IsRegister()) {
Load64BitValue(location.AsRegister<CpuRegister>(), value);
} else if (location.IsDoubleStackSlot()) {
- Load64BitValue(CpuRegister(TMP), value);
- __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
+ Store64BitValueToStack(location, value);
} else {
DCHECK(location.IsConstant());
DCHECK_EQ(location.GetConstant(), const_to_move);
@@ -1849,14 +1847,12 @@
// Processing a Dex `float-to-int' instruction.
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
break;
case Primitive::kPrimDouble:
// Processing a Dex `double-to-int' instruction.
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
break;
default:
@@ -1884,14 +1880,12 @@
// Processing a Dex `float-to-long' instruction.
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
break;
case Primitive::kPrimDouble:
// Processing a Dex `double-to-long' instruction.
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
break;
default:
@@ -2067,14 +2061,11 @@
// Processing a Dex `float-to-int' instruction.
XmmRegister input = in.AsFpuRegister<XmmRegister>();
CpuRegister output = out.AsRegister<CpuRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
__ movl(output, Immediate(kPrimIntMax));
- // temp = int-to-float(output)
- __ cvtsi2ss(temp, output, false);
- // if input >= temp goto done
- __ comiss(input, temp);
+ // if input >= (float)INT_MAX goto done
+ __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
__ j(kAboveEqual, &done);
// if input == NaN goto nan
__ j(kUnordered, &nan);
@@ -2092,14 +2083,11 @@
// Processing a Dex `double-to-int' instruction.
XmmRegister input = in.AsFpuRegister<XmmRegister>();
CpuRegister output = out.AsRegister<CpuRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
__ movl(output, Immediate(kPrimIntMax));
- // temp = int-to-double(output)
- __ cvtsi2sd(temp, output);
- // if input >= temp goto done
- __ comisd(input, temp);
+ // if input >= (double)INT_MAX goto done
+ __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
__ j(kAboveEqual, &done);
// if input == NaN goto nan
__ j(kUnordered, &nan);
@@ -2137,14 +2125,11 @@
// Processing a Dex `float-to-long' instruction.
XmmRegister input = in.AsFpuRegister<XmmRegister>();
CpuRegister output = out.AsRegister<CpuRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
codegen_->Load64BitValue(output, kPrimLongMax);
- // temp = long-to-float(output)
- __ cvtsi2ss(temp, output, true);
- // if input >= temp goto done
- __ comiss(input, temp);
+ // if input >= (float)LONG_MAX goto done
+ __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
__ j(kAboveEqual, &done);
// if input == NaN goto nan
__ j(kUnordered, &nan);
@@ -2162,14 +2147,11 @@
// Processing a Dex `double-to-long' instruction.
XmmRegister input = in.AsFpuRegister<XmmRegister>();
CpuRegister output = out.AsRegister<CpuRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
codegen_->Load64BitValue(output, kPrimLongMax);
- // temp = long-to-double(output)
- __ cvtsi2sd(temp, output, true);
- // if input >= temp goto done
- __ comisd(input, temp);
+ // if input >= (double)LONG_MAX goto done
+ __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
__ j(kAboveEqual, &done);
// if input == NaN goto nan
__ j(kUnordered, &nan);
@@ -4339,8 +4321,7 @@
codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- codegen_->Load64BitValue(CpuRegister(TMP), value);
- __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+ codegen_->Store64BitValueToStack(destination, value);
}
} else if (constant->IsFloatConstant()) {
float fp_value = constant->AsFloatConstant()->GetValue();
@@ -4371,8 +4352,7 @@
}
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- codegen_->Load64BitValue(CpuRegister(TMP), value);
- __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+ codegen_->Store64BitValueToStack(destination, value);
}
}
} else if (source.IsFpuRegister()) {
@@ -4874,6 +4854,18 @@
}
}
+void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
+ DCHECK(dest.IsDoubleStackSlot());
+ if (IsInt<32>(value)) {
+ // Can move directly as an int32 constant.
+ __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
+ Immediate(static_cast<int32_t>(value)));
+ } else {
+ Load64BitValue(CpuRegister(TMP), value);
+ __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
+ }
+}
+
void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
// Generate the constant area if needed.
X86_64Assembler* assembler = GetAssembler();
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 3b3915f..41bebac 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -322,6 +322,9 @@
// Load a 64 bit value into a register in the most efficient manner.
void Load64BitValue(CpuRegister dest, int64_t value);
+ // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
+ void Store64BitValueToStack(Location dest, int64_t value);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 62cdb4c..85d40d7 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -282,8 +282,6 @@
LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
- // TODO: Allow x86 to work with memory. This requires assembler support, see below.
- // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
locations->SetOut(Location::SameAsFirstInput());
locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
}
@@ -294,34 +292,18 @@
CodeGeneratorX86_64* codegen) {
Location output = locations->Out();
- if (output.IsFpuRegister()) {
- // In-register
- XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ DCHECK(output.IsFpuRegister());
+ XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- // TODO: Can mask directly with constant area using pand if we can guarantee
- // that the literal is aligned on a 16 byte boundary. This will avoid a
- // temporary.
- if (is64bit) {
- __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
- } else {
- __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
- __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
- }
+ // TODO: Can mask directly with constant area using pand if we can guarantee
+ // that the literal is aligned on a 16 byte boundary. This will avoid a
+ // temporary.
+ if (is64bit) {
+ __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
+ __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
} else {
- // TODO: update when assember support is available.
- UNIMPLEMENTED(FATAL) << "Needs assembler support.";
-// Once assembler support is available, in-memory operations look like this:
-// if (is64bit) {
-// DCHECK(output.IsDoubleStackSlot());
-// // No 64b and with literal.
-// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
-// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
-// } else {
-// DCHECK(output.IsStackSlot());
-// // Can use and with a literal directly.
-// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
-// }
+ __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
+ __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
}
}
@@ -736,6 +718,7 @@
codegen_->Load64BitValue(out, kPrimIntMax);
// if inPlusPointFive >= maxInt goto done
+ __ movl(out, Immediate(kPrimIntMax));
__ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
__ j(kAboveEqual, &done);
@@ -783,6 +766,7 @@
codegen_->Load64BitValue(out, kPrimLongMax);
// if inPlusPointFive >= maxLong goto done
+ __ movq(out, Immediate(kPrimLongMax));
__ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
__ j(kAboveEqual, &done);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index b8e5fb6..beca037 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -332,7 +332,7 @@
void movq(CpuRegister dst, const Address& src);
void movl(CpuRegister dst, const Address& src);
void movq(const Address& dst, CpuRegister src);
- void movq(const Address& dst, const Immediate& src);
+ void movq(const Address& dst, const Immediate& imm);
void movl(const Address& dst, CpuRegister src);
void movl(const Address& dst, const Immediate& imm);