Merge "[optimizing] Tune some x86_64 moves"
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 9d2fc43..e633970 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -99,7 +99,7 @@
if (is_div_) {
__ negq(cpu_reg_);
} else {
- __ movq(cpu_reg_, Immediate(0));
+ __ xorl(cpu_reg_, cpu_reg_);
}
}
__ jmp(GetExitLabel());
@@ -675,7 +675,7 @@
DCHECK(constant->IsLongConstant());
value = constant->AsLongConstant()->GetValue();
}
- __ movq(CpuRegister(TMP), Immediate(value));
+ Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
} else {
DCHECK(source.IsDoubleStackSlot());
@@ -708,9 +708,9 @@
} else if (const_to_move->IsLongConstant()) {
int64_t value = const_to_move->AsLongConstant()->GetValue();
if (location.IsRegister()) {
- __ movq(location.AsRegister<CpuRegister>(), Immediate(value));
+ Load64BitValue(location.AsRegister<CpuRegister>(), value);
} else if (location.IsDoubleStackSlot()) {
- __ movq(CpuRegister(TMP), Immediate(value));
+ Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
} else {
DCHECK(location.IsConstant());
@@ -959,7 +959,7 @@
LocationSummary* locations = comp->GetLocations();
CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
// Clear register: setcc only sets the low byte.
- __ xorq(reg, reg);
+ __ xorl(reg, reg);
Location lhs = locations->InAt(0);
Location rhs = locations->InAt(1);
if (rhs.IsRegister()) {
@@ -1422,8 +1422,8 @@
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
// Set the hidden argument.
- __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(),
- Immediate(invoke->GetDexMethodIndex()));
+ CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+ codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
// temp = object->GetClass();
if (receiver.IsStackSlot()) {
@@ -1859,7 +1859,7 @@
XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
- __ movq(output, Immediate(kPrimLongMax));
+ codegen_->Load64BitValue(output, kPrimLongMax);
// temp = long-to-float(output)
__ cvtsi2ss(temp, output, true);
// if input >= temp goto done
@@ -1872,7 +1872,7 @@
__ jmp(&done);
__ Bind(&nan);
// output = 0
- __ xorq(output, output);
+ __ xorl(output, output);
__ Bind(&done);
break;
}
@@ -1884,7 +1884,7 @@
XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
- __ movq(output, Immediate(kPrimLongMax));
+ codegen_->Load64BitValue(output, kPrimLongMax);
// temp = long-to-double(output)
__ cvtsi2sd(temp, output, true);
// if input >= temp goto done
@@ -1897,7 +1897,7 @@
__ jmp(&done);
__ Bind(&nan);
// output = 0
- __ xorq(output, output);
+ __ xorl(output, output);
__ Bind(&done);
break;
}
@@ -2486,7 +2486,7 @@
case Primitive::kPrimLong: {
if (instruction->IsRem()) {
- __ xorq(output_register, output_register);
+ __ xorl(output_register, output_register);
} else {
__ movq(output_register, input_register);
if (imm == -1) {
@@ -2530,7 +2530,7 @@
DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
- __ movq(rdx, Immediate(std::abs(imm) - 1));
+ codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
__ addq(rdx, numerator);
__ testq(numerator, numerator);
__ cmov(kGreaterEqual, rdx, numerator);
@@ -2627,7 +2627,7 @@
__ movq(numerator, rax);
// RAX = magic
- __ movq(rax, Immediate(magic));
+ codegen_->Load64BitValue(rax, magic);
// RDX:RAX = magic * numerator
__ imulq(numerator);
@@ -2656,8 +2656,7 @@
if (IsInt<32>(imm)) {
__ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
} else {
- __ movq(numerator, Immediate(imm));
- __ imulq(rdx, numerator);
+ __ imulq(rdx, codegen_->LiteralInt64Address(imm));
}
__ subq(rax, rdx);
@@ -3023,8 +3022,8 @@
void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
InvokeRuntimeCallingConvention calling_convention;
codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
- __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
-
+ codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+ instruction->GetTypeIndex());
__ gs()->call(
Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3045,7 +3044,8 @@
void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2)));
- __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
+ codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+ instruction->GetTypeIndex());
__ gs()->call(
Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3952,45 +3952,42 @@
} else if (constant->IsLongConstant()) {
int64_t value = constant->AsLongConstant()->GetValue();
if (destination.IsRegister()) {
- __ movq(destination.AsRegister<CpuRegister>(), Immediate(value));
+ codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ movq(CpuRegister(TMP), Immediate(value));
+ codegen_->Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
} else if (constant->IsFloatConstant()) {
float fp_value = constant->AsFloatConstant()->GetValue();
int32_t value = bit_cast<int32_t, float>(fp_value);
- Immediate imm(value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
if (value == 0) {
// easy FP 0.0.
__ xorps(dest, dest);
} else {
- __ movl(CpuRegister(TMP), imm);
- __ movd(dest, CpuRegister(TMP));
+ __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
+ Immediate imm(value);
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
}
} else {
DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
double fp_value = constant->AsDoubleConstant()->GetValue();
int64_t value = bit_cast<int64_t, double>(fp_value);
- Immediate imm(value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
if (value == 0) {
__ xorpd(dest, dest);
} else {
- __ movq(CpuRegister(TMP), imm);
- __ movd(dest, CpuRegister(TMP));
+ __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
}
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ movq(CpuRegister(TMP), imm);
+ codegen_->Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
}
@@ -4449,6 +4446,17 @@
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
+ if (value == 0) {
+ __ xorl(dest, dest);
+ } else if (value > 0 && IsInt<32>(value)) {
+ // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+ __ movl(dest, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ movq(dest, Immediate(value));
+ }
+}
+
void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
// Generate the constant area if needed.
X86_64Assembler* assembler = GetAssembler();
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 13f9c46..480ea6b 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -282,6 +282,9 @@
Address LiteralInt32Address(int32_t v);
Address LiteralInt64Address(int64_t v);
+ // Load a 64 bit value into a register in the most efficient manner.
+ void Load64BitValue(CpuRegister dest, int64_t value);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1fc5432..3dbcd4c 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -783,7 +783,7 @@
__ Bind(&nan);
// output = 0
- __ xorq(out, out);
+ __ xorl(out, out);
__ Bind(&done);
}