ART: Reduce LockCallTemps usage Using FlushAllRegs/LockCallTemps in integer arithmetics causes excess register flushing and clobbering. This patch adds API that allows to flush, clobber and lock only those registers we really need for calculations. Change-Id: Idabaa4fff4d18a33e5040a80f66f2df6432f8be0 Signed-off-by: Max Kazantsev <maxim.kazantsev@intel.com>

commit: 6dccdc2511c9f22d3cc2ea83386ce9db2688fa19 [log] [tgz]
author: Maxim Kazantsev <maxim.kazantsev@intel.com> Mon Aug 18 18:43:55 2014 +0700
committer: Ian Rogers <irogers@google.com> Thu Sep 04 08:47:13 2014 -0700
tree: a6ec44c6e2351826962cd3d08560e64fed2c218b
parent: 2fb54e875697126de075ae736fd6cb136b7b4f94 [diff]
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index a85e02f..dd4d661 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h

@@ -22,6 +22,7 @@
 #include "x86_lir.h"
 
 #include <map>
+#include <vector>
 
 namespace art {
 
@@ -61,6 +62,15 @@
     bool initialized_;
   };
 
+  class ExplicitTempRegisterLock {
+  public:
+    ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir, int n_regs, ...);
+    ~ExplicitTempRegisterLock();
+  protected:
+    std::vector<RegStorage> temp_regs_;
+    X86Mir2Lir* const mir_to_lir_;
+  };
+
  public:
   X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 

diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 196155e..a745339 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc

@@ -49,8 +49,8 @@
     return;
   }
 
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
+  // Prepare for explicit register usage
+  ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
   LoadValueDirectWideFixed(rl_src1, r_tmp1);
@@ -398,8 +398,8 @@
     return;
   }
 
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
+  // Prepare for explicit register usage
+  ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
   RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
   LoadValueDirectWideFixed(rl_src1, r_tmp1);
@@ -768,8 +768,9 @@
 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
                                   RegLocation rl_src2, bool is_div, bool check_zero) {
   // We have to use fixed registers, so flush all the temps.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Prepare for explicit register usage.
+  ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
 
   // Load LHS into EAX.
   LoadValueDirectFixed(rl_src1, rs_r0);
@@ -791,11 +792,11 @@
 
   // Have to catch 0x80000000/-1 case, or we will get an exception!
   OpRegImm(kOpCmp, rs_r1, -1);
-  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
 
   // RHS is -1.
   OpRegImm(kOpCmp, rs_r0, 0x80000000);
-  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
 
   branch->target = NewLIR0(kPseudoTargetLabel);
 
@@ -1606,8 +1607,8 @@
   if (!cu_->target64) {
     int32_t val_lo = Low32Bits(val);
     int32_t val_hi = High32Bits(val);
-    FlushAllRegs();
-    LockCallTemps();  // Prepare for explicit register usage.
+    // Prepare for explicit register usage.
+    ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
     rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
     bool src1_in_reg = rl_src1.location == kLocPhysReg;
     int displacement = SRegOffset(rl_src1.s_reg_low);
@@ -1690,8 +1691,8 @@
   bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
                    mir_graph_->SRegToVReg(rl_src2.s_reg_low);
 
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage.
+  // Prepare for explicit register usage.
+  ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
   rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
   rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
 
@@ -1714,7 +1715,7 @@
       NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
     } else {
       int displacement = SRegOffset(rl_src2.s_reg_low);
-      LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
+      LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
                        displacement + LOWORD_OFFSET);
       AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
                               true /* is_load */, true /* is_64bit */);
@@ -2154,8 +2155,8 @@
   }
 
   // We have to use fixed registers, so flush all the temps.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage.
+  // Prepare for explicit register usage.
+  ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q);
 
   // Load LHS into RAX.
   LoadValueDirectWideFixed(rl_src1, rs_r0q);
@@ -2171,7 +2172,7 @@
 
   // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
   NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
-  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
 
   // RHS is -1.
   LoadConstantWide(rs_r6q, 0x8000000000000000);

diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index ffe6702..aadb41a 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc

@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
-#include <string>
+#include <cstdarg>
 #include <inttypes.h>
+#include <string>
 
 #include "backend_x86.h"
 #include "codegen_x86.h"
@@ -2919,4 +2920,46 @@
   return true;
 }
 
+/**
+ * Lock temp registers for explicit usage. Registers will be freed in destructor.
+ */
+X86Mir2Lir::ExplicitTempRegisterLock::ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir,
+                                                               int n_regs, ...) :
+    temp_regs_(n_regs),
+    mir_to_lir_(mir_to_lir) {
+  va_list regs;
+  va_start(regs, n_regs);
+  for (int i = 0; i < n_regs; i++) {
+    RegStorage reg = *(va_arg(regs, RegStorage*));
+    RegisterInfo* info = mir_to_lir_->GetRegInfo(reg);
+
+    // Make sure we don't have promoted register here.
+    DCHECK(info->IsTemp());
+
+    temp_regs_.push_back(reg);
+    mir_to_lir_->FlushReg(reg);
+
+    if (reg.IsPair()) {
+      RegStorage partner = info->Partner();
+      temp_regs_.push_back(partner);
+      mir_to_lir_->FlushReg(partner);
+    }
+
+    mir_to_lir_->Clobber(reg);
+    mir_to_lir_->LockTemp(reg);
+  }
+
+  va_end(regs);
+}
+
+/*
+ * Free all locked registers.
+ */
+X86Mir2Lir::ExplicitTempRegisterLock::~ExplicitTempRegisterLock() {
+  // Free all locked temps.
+  for (auto it : temp_regs_) {
+    mir_to_lir_->FreeTemp(it);
+  }
+}
+
 }  // namespace art
commit	6dccdc2511c9f22d3cc2ea83386ce9db2688fa19	[log] [tgz]
author	Maxim Kazantsev <maxim.kazantsev@intel.com>	Mon Aug 18 18:43:55 2014 +0700
committer	Ian Rogers <irogers@google.com>	Thu Sep 04 08:47:13 2014 -0700
tree	a6ec44c6e2351826962cd3d08560e64fed2c218b
parent	2fb54e875697126de075ae736fd6cb136b7b4f94 [diff]