Merge "Optimizing: Use more X86 3 operand multiplies"
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 676b842..0569565 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2348,7 +2348,12 @@
case Primitive::kPrimInt:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
- locations->SetOut(Location::SameAsFirstInput());
+ if (mul->InputAt(1)->IsIntConstant()) {
+ // Can use 3 operand multiply.
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
break;
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
@@ -2376,21 +2381,24 @@
LocationSummary* locations = mul->GetLocations();
Location first = locations->InAt(0);
Location second = locations->InAt(1);
- DCHECK(first.Equals(locations->Out()));
+ Location out = locations->Out();
switch (mul->GetResultType()) {
- case Primitive::kPrimInt: {
- if (second.IsRegister()) {
+ case Primitive::kPrimInt:
+ // The constant may have ended up in a register, so test explicitly to avoid
+ // problems where the output may not be the same as the first operand.
+ if (mul->InputAt(1)->IsIntConstant()) {
+ Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
+ __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
+ } else if (second.IsRegister()) {
+ DCHECK(first.Equals(out));
__ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
- } else if (second.IsConstant()) {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
- __ imull(first.AsRegister<Register>(), imm);
} else {
DCHECK(second.IsStackSlot());
+ DCHECK(first.Equals(out));
__ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
}
break;
- }
case Primitive::kPrimLong: {
Register in1_hi = first.AsRegisterPairHigh<Register>();
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a95ce68..287737b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2535,13 +2535,19 @@
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
- locations->SetOut(Location::SameAsFirstInput());
+ if (mul->InputAt(1)->IsIntConstant()) {
+ // Can use 3 operand multiply.
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
break;
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
- if (locations->InAt(1).IsConstant()) {
+ locations->SetInAt(1, Location::Any());
+ if (mul->InputAt(1)->IsLongConstant() &&
+ IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
// Can use 3 operand multiply.
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
} else {
@@ -2566,37 +2572,51 @@
LocationSummary* locations = mul->GetLocations();
Location first = locations->InAt(0);
Location second = locations->InAt(1);
+ Location out = locations->Out();
switch (mul->GetResultType()) {
- case Primitive::kPrimInt: {
- DCHECK(first.Equals(locations->Out()));
- if (second.IsRegister()) {
+ case Primitive::kPrimInt:
+ // The constant may have ended up in a register, so test explicitly to avoid
+ // problems where the output may not be the same as the first operand.
+ if (mul->InputAt(1)->IsIntConstant()) {
+ Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
+ __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
+ } else if (second.IsRegister()) {
+ DCHECK(first.Equals(out));
__ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
- } else if (second.IsConstant()) {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
- __ imull(first.AsRegister<CpuRegister>(), imm);
} else {
+ DCHECK(first.Equals(out));
DCHECK(second.IsStackSlot());
__ imull(first.AsRegister<CpuRegister>(),
Address(CpuRegister(RSP), second.GetStackIndex()));
}
break;
- }
case Primitive::kPrimLong: {
- if (second.IsConstant()) {
- int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
- DCHECK(IsInt<32>(value));
- __ imulq(locations->Out().AsRegister<CpuRegister>(),
- first.AsRegister<CpuRegister>(),
- Immediate(static_cast<int32_t>(value)));
- } else {
- DCHECK(first.Equals(locations->Out()));
+ // The constant may have ended up in a register, so test explicitly to avoid
+ // problems where the output may not be the same as the first operand.
+ if (mul->InputAt(1)->IsLongConstant()) {
+ int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
+ if (IsInt<32>(value)) {
+ __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
+ Immediate(static_cast<int32_t>(value)));
+ } else {
+ // Have to use the constant area.
+ DCHECK(first.Equals(out));
+ __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
+ }
+ } else if (second.IsRegister()) {
+ DCHECK(first.Equals(out));
__ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ DCHECK(first.Equals(out));
+ __ imulq(first.AsRegister<CpuRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
}
break;
}
case Primitive::kPrimFloat: {
- DCHECK(first.Equals(locations->Out()));
+ DCHECK(first.Equals(out));
if (second.IsFpuRegister()) {
__ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
@@ -2611,7 +2631,7 @@
}
case Primitive::kPrimDouble: {
- DCHECK(first.Equals(locations->Out()));
+ DCHECK(first.Equals(out));
if (second.IsFpuRegister()) {
__ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index a614193..60b9f0d 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1194,11 +1194,26 @@
}
-void X86Assembler::imull(Register reg, const Immediate& imm) {
+void X86Assembler::imull(Register dst, Register src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x69);
- EmitOperand(reg, Operand(reg));
- EmitImmediate(imm);
+ // See whether imm can be represented as a sign-extended 8bit value.
+ int32_t v32 = static_cast<int32_t>(imm.value());
+ if (IsInt<8>(v32)) {
+ // Sign-extension works.
+ EmitUint8(0x6B);
+ EmitOperand(dst, Operand(src));
+ EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
+ } else {
+ // Not representable, use full immediate.
+ EmitUint8(0x69);
+ EmitOperand(dst, Operand(src));
+ EmitImmediate(imm);
+ }
+}
+
+
+void X86Assembler::imull(Register reg, const Immediate& imm) {
+ imull(reg, reg, imm);
}
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index ae8d7a1..fb4c83a 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -409,6 +409,7 @@
void imull(Register dst, Register src);
void imull(Register reg, const Immediate& imm);
+ void imull(Register dst, Register src, const Immediate& imm);
void imull(Register reg, const Address& address);
void imull(Register reg);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1dd4a2e..3ff4b35 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1672,28 +1672,33 @@
EmitOperand(dst.LowBits(), Operand(src));
}
-void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
+void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
CHECK(imm.is_int32()); // imull only supports 32b immediate.
- EmitOptionalRex32(reg, reg);
+ EmitOptionalRex32(dst, src);
// See whether imm can be represented as a sign-extended 8bit value.
int32_t v32 = static_cast<int32_t>(imm.value());
if (IsInt<8>(v32)) {
// Sign-extension works.
EmitUint8(0x6B);
- EmitOperand(reg.LowBits(), Operand(reg));
+ EmitOperand(dst.LowBits(), Operand(src));
EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
} else {
// Not representable, use full immediate.
EmitUint8(0x69);
- EmitOperand(reg.LowBits(), Operand(reg));
+ EmitOperand(dst.LowBits(), Operand(src));
EmitImmediate(imm);
}
}
+void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
+ imull(reg, reg, imm);
+}
+
+
void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 89a5606..202d989 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -539,6 +539,7 @@
void imull(CpuRegister dst, CpuRegister src);
void imull(CpuRegister reg, const Immediate& imm);
+ void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
void imull(CpuRegister reg, const Address& address);
void imulq(CpuRegister src);