Improve x86 long multiply and shifts
Generate inline code for long shifts by constants and do long
multiplication inline. Convert multiplication by a constant to a
shift when we can. Fix some x86 assembler problems and add the new
instructions that were needed (64 bit shifts).
Change-Id: I6237a31c36159096e399d40d01eb6bfa22ac2772
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 3668dc0..32673db 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -192,7 +192,6 @@
MIR* SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object);
MIR* SpecialIdentity(MIR* mir);
LIR* LoadFPConstantValue(int r_dest, int value);
- bool BadOverlap(RegLocation rl_src, RegLocation rl_dest);
void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
void AssignDataOffsets();
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 71c3492..150794e 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -794,18 +794,6 @@
StoreValueWide(rl_dest, rl_result);
}
-
- /*
- * Check to see if a result pair has a misaligned overlap with an operand pair. This
- * is not usual for dx to generate, but it is legal (for now). In a future rev of
- * dex, we'll want to make this case illegal.
- */
-bool ArmMir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
- DCHECK(rl_src.wide);
- DCHECK(rl_dest.wide);
- return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
-}
-
void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_src2) {
/*
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 12ecfff..1eb79c9 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1137,4 +1137,28 @@
new_lir->next->prev = new_lir;
}
+bool Mir2Lir::IsPowerOfTwo(uint64_t x) {
+ return (x & (x - 1)) == 0;
+}
+
+// Returns the index of the lowest set bit in 'x'.
+int32_t Mir2Lir::LowestSetBit(uint64_t x) {
+ int bit_posn = 0;
+ while ((x & 0xf) == 0) {
+ bit_posn += 4;
+ x >>= 4;
+ }
+ while ((x & 1) == 0) {
+ bit_posn++;
+ x >>= 1;
+ }
+ return bit_posn;
+}
+
+bool Mir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
+ DCHECK(rl_src.wide);
+ DCHECK(rl_dest.wide);
+ return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
+}
+
} // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 1f00b2a..d8b9869 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1426,30 +1426,12 @@
* or produce corresponding Thumb instructions directly.
*/
-static bool IsPowerOfTwo(int x) {
- return (x & (x - 1)) == 0;
-}
-
// Returns true if no more than two bits are set in 'x'.
static bool IsPopCountLE2(unsigned int x) {
x &= x - 1;
return (x & (x - 1)) == 0;
}
-// Returns the index of the lowest set bit in 'x'.
-static int32_t LowestSetBit(uint32_t x) {
- int bit_posn = 0;
- while ((x & 0xf) == 0) {
- bit_posn += 4;
- x >>= 4;
- }
- while ((x & 1) == 0) {
- bit_posn++;
- x >>= 1;
- }
- return bit_posn;
-}
-
// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
// and store the result in 'rl_dest'.
bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -1741,7 +1723,7 @@
break;
case Instruction::MUL_LONG:
case Instruction::MUL_LONG_2ADDR:
- if (cu_->instruction_set == kThumb2) {
+ if (cu_->instruction_set != kMips) {
GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
return;
} else {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index b59ec5e..b54bc7b 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -873,6 +873,29 @@
CompilationUnit* GetCompilationUnit() {
return cu_;
}
+ /*
+ * @brief Returns the index of the lowest set bit in 'x'.
+ * @param x Value to be examined.
+ * @returns The bit number of the lowest bit set in the value.
+ */
+ int32_t LowestSetBit(uint64_t x);
+ /*
+ * @brief Is this value a power of two?
+ * @param x Value to be examined.
+ * @returns 'true' if only 1 bit is set in the value.
+ */
+ bool IsPowerOfTwo(uint64_t x);
+ /*
+ * @brief Do these SRs overlap?
+ * @param rl_op1 One RegLocation
+ * @param rl_op2 The other RegLocation
+ * @return 'true' if the VR pairs overlap
+ *
+ * Check to see if a result pair has a misaligned overlap with an operand pair. This
+ * is not usual for dx to generate, but it is legal (for now). In a future rev of
+ * dex, we'll want to make this case illegal.
+ */
+ bool BadOverlap(RegLocation rl_op1, RegLocation rl_op2);
/*
* @brief Force a location (in a register) into a temporary register
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 5e1c4d1..6d092d6 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -211,6 +211,8 @@
#undef SHIFT_ENCODING_MAP
{ kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
+ { kX86Shld32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32", "!0r,!1r,!2d" },
+ { kX86Shrd32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32", "!0r,!1r,!2d" },
{ kX86Test8RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
{ kX86Test8MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
@@ -422,6 +424,7 @@
case kThreadImm: // lir operands - 0: disp, 1: imm
return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit
case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm
+ case kRegRegImmRev:
return ComputeSize(entry, 0, 0, false);
case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm
return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
@@ -642,7 +645,6 @@
DCHECK_NE(0x0F, entry->skeleton.opcode);
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
DCHECK_EQ(0, entry->skeleton.extra_opcode2);
- DCHECK_NE(rX86_SP, base);
EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -755,6 +757,22 @@
EmitImm(entry, imm);
}
+void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry,
+ uint8_t reg1, uint8_t reg2, int32_t imm) {
+ EmitRegRegImm(entry, reg2, reg1, imm);
+}
+
+void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
+ uint8_t reg, uint8_t base, int disp, int32_t imm) {
+ EmitPrefixAndOpcode(entry);
+ DCHECK(!X86_FPREG(reg));
+ DCHECK_LT(reg, 8);
+ EmitModrmDisp(reg, base, disp);
+ DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+ DCHECK_EQ(0, entry->skeleton.ax_opcode);
+ EmitImm(entry, imm);
+}
+
void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
if (entry->skeleton.prefix1 != 0) {
code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1186,9 +1204,16 @@
case kRegRegStore: // lir operands - 0: reg2, 1: reg1
EmitRegReg(entry, lir->operands[1], lir->operands[0]);
break;
+ case kRegRegImmRev:
+ EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+ break;
case kRegRegImm:
EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
break;
+ case kRegMemImm:
+ EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+ lir->operands[3]);
+ break;
case kRegImm: // lir operands - 0: reg, 1: immediate
EmitRegImm(entry, lir->operands[0], lir->operands[1]);
break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 9cc4efd..6280b64 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -245,6 +245,8 @@
void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+ void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+ void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp, int32_t imm);
void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
@@ -337,6 +339,32 @@
* @param is_div 'true' if this is a division, 'false' for a remainder.
*/
RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
+
+ /*
+ * Generate code to implement long shift operations.
+ * @param opcode The DEX opcode to specify the shift type.
+ * @param rl_dest The destination.
+ * @param rl_src The value to be shifted.
+ * @param shift_amount How much to shift.
+ * @returns the RegLocation of the result.
+ */
+ RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src, int shift_amount);
+ /*
+ * Generate an imul of a register by a constant or a better sequence.
+ * @param dest Destination Register.
+ * @param src Source Register.
+ * @param val Constant multiplier.
+ */
+ void GenImulRegImm(int dest, int src, int val);
+ /*
+ * Generate an imul of a memory location by a constant or a better sequence.
+ * @param dest Destination Register.
+ * @param sreg Symbolic register.
+ * @param displacement Displacement on stack of Symbolic Register.
+ * @param val Constant multiplier.
+ */
+ void GenImulMemImm(int dest, int sreg, int displacement, int val);
};
} // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index ccae130..fc60eb9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -755,9 +755,188 @@
return NULL;
}
+void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) {
+ switch (val) {
+ case 0:
+ NewLIR2(kX86Xor32RR, dest, dest);
+ break;
+ case 1:
+ OpRegCopy(dest, src);
+ break;
+ default:
+ OpRegRegImm(kOpMul, dest, src, val);
+ break;
+ }
+}
+
+void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) {
+ LIR *m;
+ switch (val) {
+ case 0:
+ NewLIR2(kX86Xor32RR, dest, dest);
+ break;
+ case 1:
+ LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg);
+ break;
+ default:
+ m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP,
+ displacement, val);
+ AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
+ break;
+ }
+}
+
void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2) {
- LOG(FATAL) << "Unexpected use of GenX86Long for x86";
+ if (rl_src1.is_const) {
+ std::swap(rl_src1, rl_src2);
+ }
+ // Are we multiplying by a constant?
+ if (rl_src2.is_const) {
+ // Do special compare/branch against simple const operand
+ int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+ if (val == 0) {
+ RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+ OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+ OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg);
+ StoreValueWide(rl_dest, rl_result);
+ return;
+ } else if (val == 1) {
+ rl_src1 = EvalLocWide(rl_src1, kCoreReg, true);
+ StoreValueWide(rl_dest, rl_src1);
+ return;
+ } else if (val == 2) {
+ GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
+ return;
+ } else if (IsPowerOfTwo(val)) {
+ int shift_amount = LowestSetBit(val);
+ if (!BadOverlap(rl_src1, rl_dest)) {
+ rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+ RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
+ rl_src1, shift_amount);
+ StoreValueWide(rl_dest, rl_result);
+ return;
+ }
+ }
+
+ // Okay, just bite the bullet and do it.
+ int32_t val_lo = Low32Bits(val);
+ int32_t val_hi = High32Bits(val);
+ FlushAllRegs();
+ LockCallTemps(); // Prepare for explicit register usage.
+ rl_src1 = UpdateLocWide(rl_src1);
+ bool src1_in_reg = rl_src1.location == kLocPhysReg;
+ int displacement = SRegOffset(rl_src1.s_reg_low);
+
+ // ECX <- 1H * 2L
+ // EAX <- 1L * 2H
+ if (src1_in_reg) {
+ GenImulRegImm(r1, rl_src1.high_reg, val_lo);
+ GenImulRegImm(r0, rl_src1.low_reg, val_hi);
+ } else {
+ GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
+ GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
+ }
+
+ // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
+ NewLIR2(kX86Add32RR, r1, r0);
+
+ // EAX <- 2L
+ LoadConstantNoClobber(r0, val_lo);
+
+ // EDX:EAX <- 2L * 1L (double precision)
+ if (src1_in_reg) {
+ NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+ } else {
+ LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // EDX <- EDX + ECX (add high words)
+ NewLIR2(kX86Add32RR, r2, r1);
+
+ // Result is EDX:EAX
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+ INVALID_SREG, INVALID_SREG};
+ StoreValueWide(rl_dest, rl_result);
+ return;
+ }
+
+ // Nope. Do it the hard way
+ FlushAllRegs();
+ LockCallTemps(); // Prepare for explicit register usage.
+ rl_src1 = UpdateLocWide(rl_src1);
+ rl_src2 = UpdateLocWide(rl_src2);
+
+ // At this point, the VRs are in their home locations.
+ bool src1_in_reg = rl_src1.location == kLocPhysReg;
+ bool src2_in_reg = rl_src2.location == kLocPhysReg;
+
+ // ECX <- 1H
+ if (src1_in_reg) {
+ NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg);
+ } else {
+ LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1,
+ kWord, GetSRegHi(rl_src1.s_reg_low));
+ }
+
+ // EAX <- 2H
+ if (src2_in_reg) {
+ NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg);
+ } else {
+ LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0,
+ kWord, GetSRegHi(rl_src2.s_reg_low));
+ }
+
+ // EAX <- EAX * 1L (2H * 1L)
+ if (src1_in_reg) {
+ NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg);
+ } else {
+ int displacement = SRegOffset(rl_src1.s_reg_low);
+ LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // ECX <- ECX * 2L (1H * 2L)
+ if (src2_in_reg) {
+ NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg);
+ } else {
+ int displacement = SRegOffset(rl_src2.s_reg_low);
+ LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
+ NewLIR2(kX86Add32RR, r1, r0);
+
+ // EAX <- 2L
+ if (src2_in_reg) {
+ NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg);
+ } else {
+ LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0,
+ kWord, rl_src2.s_reg_low);
+ }
+
+ // EDX:EAX <- 2L * 1L (double precision)
+ if (src1_in_reg) {
+ NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+ } else {
+ int displacement = SRegOffset(rl_src1.s_reg_low);
+ LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // EDX <- EDX + ECX (add high words)
+ NewLIR2(kX86Add32RR, r2, r1);
+
+ // Result is EDX:EAX
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+ INVALID_SREG, INVALID_SREG};
+ StoreValueWide(rl_dest, rl_result);
}
void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
@@ -1057,10 +1236,89 @@
}
}
+RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src, int shift_amount) {
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ switch (opcode) {
+ case Instruction::SHL_LONG:
+ case Instruction::SHL_LONG_2ADDR:
+ DCHECK_NE(shift_amount, 1); // Prevent a double store from happening.
+ if (shift_amount == 32) {
+ OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+ LoadConstant(rl_result.low_reg, 0);
+ } else if (shift_amount > 31) {
+ OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+ FreeTemp(rl_src.high_reg);
+ NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32);
+ LoadConstant(rl_result.low_reg, 0);
+ } else {
+ OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount);
+ NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount);
+ }
+ break;
+ case Instruction::SHR_LONG:
+ case Instruction::SHR_LONG_2ADDR:
+ if (shift_amount == 32) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+ } else if (shift_amount > 31) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32);
+ NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+ } else {
+ OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+ NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount);
+ }
+ break;
+ case Instruction::USHR_LONG:
+ case Instruction::USHR_LONG_2ADDR:
+ if (shift_amount == 32) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ LoadConstant(rl_result.high_reg, 0);
+ } else if (shift_amount > 31) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32);
+ LoadConstant(rl_result.high_reg, 0);
+ } else {
+ OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+ NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected case";
+ }
+ return rl_result;
+}
+
void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
- RegLocation rl_src1, RegLocation rl_shift) {
- // Default implementation is just to ignore the constant case.
- GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+ RegLocation rl_src, RegLocation rl_shift) {
+ // Per spec, we only care about low 6 bits of shift amount.
+ int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+ if (shift_amount == 0) {
+ rl_src = LoadValueWide(rl_src, kCoreReg);
+ StoreValueWide(rl_dest, rl_src);
+ return;
+ } else if (shift_amount == 1 &&
+ (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
+ // Need to handle this here to avoid calling StoreValueWide twice.
+ GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
+ return;
+ }
+ if (BadOverlap(rl_src, rl_dest)) {
+ GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
+ return;
+ }
+ rl_src = LoadValueWide(rl_src, kCoreReg);
+ RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
+ StoreValueWide(rl_dest, rl_result);
}
void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index d7f61fc..3a12038 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -304,6 +304,8 @@
BinaryShiftOpCode(kX86Sar),
#undef BinaryShiftOpcode
kX86Cmc,
+ kX86Shld32RRI,
+ kX86Shrd32RRI,
#define UnaryOpcode(opcode, reg, mem, array) \
opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
@@ -398,6 +400,7 @@
kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds.
kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds.
kMovRegImm, // Shorter form move RI.
+ kRegRegImmRev, // RRI with first reg in r/m
kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate.
kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL.
kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds.