Optimize x86 long arithmetic
Be smarter about taking advantage of a constant operand for x86 long
add/sub/and/or/xor. Using instructions with immediates and generating
results directly into memory reduces the number of temporary registers
and avoids hardcoded register usage.
Also rewrite the existing non-const x86 arithmetic to avoid fixed
register use, and use the fact that x86 instructions are two operand.
Pass the opcode to the XXXLong() routines to easily detect two operand
DEX opcodes.
Add a new StoreFinalValueWide() routine, which is similar to StoreValueWide,
but doesn't do an EvalLoc to allocate registers. The src operand must
already be in registers, and it just updates the dest location, and
calls the right live/dirty routines to get the src into the dest
properly.
Change-Id: Iefc16e7bc2236a73dc780d3d5137ae8343171f62
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 5c993c5..27cec8d2 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -679,31 +679,24 @@
}
DCHECK_NE(loc.s_reg_low, INVALID_SREG);
- if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
- // Need a wide vector register.
- low_reg = AllocTypedTemp(true, reg_class);
- loc.low_reg = low_reg;
- loc.high_reg = low_reg; // Play nice with existing code.
- loc.vec_len = kVectorLength8;
- if (update) {
- loc.location = kLocPhysReg;
- MarkLive(loc.low_reg, loc.s_reg_low);
- }
+ DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+ new_regs = AllocTypedTempPair(loc.fp, reg_class);
+ loc.low_reg = new_regs & 0xff;
+ loc.high_reg = (new_regs >> 8) & 0xff;
+
+ if (loc.low_reg == loc.high_reg) {
DCHECK(IsFpReg(loc.low_reg));
+ loc.vec_len = kVectorLength8;
} else {
- DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
-
- new_regs = AllocTypedTempPair(loc.fp, reg_class);
- loc.low_reg = new_regs & 0xff;
- loc.high_reg = (new_regs >> 8) & 0xff;
-
MarkPair(loc.low_reg, loc.high_reg);
- if (update) {
- loc.location = kLocPhysReg;
- MarkLive(loc.low_reg, loc.s_reg_low);
+ }
+ if (update) {
+ loc.location = kLocPhysReg;
+ MarkLive(loc.low_reg, loc.s_reg_low);
+ if (loc.low_reg != loc.high_reg) {
MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
}
- DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
}
return loc;
}
@@ -796,4 +789,23 @@
// Just use the standard code to do the generation.
Mir2Lir::GenConstWide(rl_dest, value);
}
+
+// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
+void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
+ LOG(INFO) << "location: " << loc.location << ','
+ << (loc.wide ? " w" : " ")
+ << (loc.defined ? " D" : " ")
+ << (loc.is_const ? " c" : " ")
+ << (loc.fp ? " F" : " ")
+ << (loc.core ? " C" : " ")
+ << (loc.ref ? " r" : " ")
+ << (loc.high_word ? " h" : " ")
+ << (loc.home ? " H" : " ")
+ << " vec_len: " << loc.vec_len
+ << ", low: " << static_cast<int>(loc.low_reg)
+ << ", high: " << static_cast<int>(loc.high_reg)
+ << ", s_reg: " << loc.s_reg_low
+ << ", orig: " << loc.orig_sreg;
+}
+
} // namespace art