ART: Inline implementation of min() and max() for long in x86
Change-Id: I49de0ee10f79ce49245d6a6c2cd06c5a46b45dca
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index afa2ae2..7ba9570 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -793,8 +793,115 @@
bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
- if (is_long && cu_->instruction_set == kX86) {
- return false;
+ if (is_long && !cu_->target64) {
+ /*
+ * We want to implement the following algorithm
+ * mov eax, low part of arg1
+ * mov edx, high part of arg1
+ * mov ebx, low part of arg2
+ * mov ecx, high part of arg2
+ * mov edi, eax
+ * sub edi, ebx
+ * mov edi, edx
+ * sbb edi, ecx
+ * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx"
+ * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx"
+ *
+ * The algorithm above needs 5 registers: a pair for the first operand
+ * (which later will be used as result), a pair for the second operand
+ * and a temp register (e.g. 'edi') for intermediate calculations.
+ * Ideally we have 6 GP caller-save registers in 32-bit mode. They are:
+ * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be
+ * always enough registers to operate on. Practically, there is a pair
+ * of registers 'edi' and 'esi' which holds promoted values and
+ * sometimes should be treated as 'callee save'. If one of the operands
+ * is in the promoted registers then we have enough register to
+ * operate on. Otherwise there is lack of resources and we have to
+ * save 'edi' before calculations and restore after.
+ */
+
+ RegLocation rl_src1 = info->args[0];
+ RegLocation rl_src2 = info->args[2];
+ RegLocation rl_dest = InlineTargetWide(info);
+ int res_vreg, src1_vreg, src2_vreg;
+
+ /*
+ * If the result register is the same as the second element, then we
+ * need to be careful. The reason is that the first copy will
+ * inadvertently clobber the second element with the first one thus
+ * yielding the wrong result. Thus we do a swap in that case.
+ */
+ res_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
+ src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low);
+ if (res_vreg == src2_vreg) {
+ std::swap(rl_src1, rl_src2);
+ }
+
+ rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+ // Pick the first integer as min/max.
+ OpRegCopyWide(rl_result.reg, rl_src1.reg);
+
+ /*
+ * If the integers are both in the same register, then there is
+ * nothing else to do because they are equal and we have already
+ * moved one into the result.
+ */
+ src1_vreg = mir_graph_->SRegToVReg(rl_src1.s_reg_low);
+ src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low);
+ if (src1_vreg == src2_vreg) {
+ StoreValueWide(rl_dest, rl_result);
+ return true;
+ }
+
+ // Free registers to make some room for the second operand.
+ // But don't try to free ourselves or promoted registers.
+ if (res_vreg != src1_vreg &&
+ IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
+ FreeTemp(rl_src1.reg);
+ }
+ rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+
+ // Do we have a free register for intermediate calculations?
+ RegStorage tmp = AllocTemp(false);
+ if (tmp == RegStorage::InvalidReg()) {
+ /*
+ * No, will use 'edi'.
+ *
+ * As mentioned above we have 4 temporary and 2 promotable
+ * caller-save registers. Therefore, we assume that a free
+ * register can be allocated only if 'esi' and 'edi' are
+ * already used as operands. If number of promotable registers
+ * increases from 2 to 4 then our assumption fails and operand
+ * data is corrupted.
+ * Let's DCHECK it.
+ */
+ DCHECK(IsTemp(rl_src2.reg.GetLow()) &&
+ IsTemp(rl_src2.reg.GetHigh()) &&
+ IsTemp(rl_result.reg.GetLow()) &&
+ IsTemp(rl_result.reg.GetHigh()));
+ tmp = rs_rDI;
+ NewLIR1(kX86Push32R, tmp.GetReg());
+ }
+
+ // Now we are ready to do calculations.
+ OpRegReg(kOpMov, tmp, rl_result.reg.GetLow());
+ OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow());
+ OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh());
+ OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh());
+
+ // Let's put pop 'edi' here to break a bit the dependency chain.
+ if (tmp == rs_rDI) {
+ NewLIR1(kX86Pop32R, tmp.GetReg());
+ }
+
+ // Conditionally move the other integer into the destination register.
+ ConditionCode cc = is_min ? kCondGe : kCondLt;
+ OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
+ OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
+ StoreValueWide(rl_dest, rl_result);
+ return true;
}
// Get the two arguments to the invoke and place them in GP registers.