Improve x86 Fused long compare to literal

Generate better x86 code for the fused long comparison/branch
if one of the arguments is a literal. Use the algorithm from ARM,
tweaked for x86.

Change-Id: I872ba5dfaeeaaba6beff756d2eb6f9c6d018ce3e
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 484d0cc..22c4452 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -209,6 +209,8 @@
                    int scale, int table_or_disp);
     void EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset);
     void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
+    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
+                                  int64_t val, ConditionCode ccode);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 8ff9ded..56cf7e9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -183,11 +183,23 @@
   LIR* taken = &block_label_list_[bb->taken];
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+
+  if (rl_src1.is_const) {
+    std::swap(rl_src1, rl_src2);
+    ccode = FlipComparisonOrder(ccode);
+  }
+  if (rl_src2.is_const) {
+    // Do special compare/branch against simple const operand
+    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+    GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
+    return;
+  }
+
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage
   LoadValueDirectWideFixed(rl_src1, r0, r1);
   LoadValueDirectWideFixed(rl_src2, r2, r3);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
   // Swap operands and condition code to prevent use of zero flag.
   if (ccode == kCondLe || ccode == kCondGt) {
     // Compute (r3:r2) = (r3:r2) - (r1:r0)
@@ -218,6 +230,56 @@
   OpCondBranch(ccode, taken);
 }
 
+void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
+                                          int64_t val, ConditionCode ccode) {
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  LIR* taken = &block_label_list_[bb->taken];
+  LIR* not_taken = &block_label_list_[bb->fall_through];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  int32_t low_reg = rl_src1.low_reg;
+  int32_t high_reg = rl_src1.high_reg;
+
+  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
+    int t_reg = AllocTemp();
+    OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
+    FreeTemp(t_reg);
+    OpCondBranch(ccode, taken);
+    return;
+  }
+
+  OpRegImm(kOpCmp, high_reg, val_hi);
+  switch (ccode) {
+    case kCondEq:
+    case kCondNe:
+      OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken);
+      break;
+    case kCondLt:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondUlt;
+      break;
+    case kCondLe:
+      OpCondBranch(kCondLt, taken);
+      OpCondBranch(kCondGt, not_taken);
+      ccode = kCondLs;
+      break;
+    case kCondGt:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondHi;
+      break;
+    case kCondGe:
+      OpCondBranch(kCondGt, taken);
+      OpCondBranch(kCondLt, not_taken);
+      ccode = kCondUge;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
+}
+
 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo,
                                      int lit, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";