ART: Reduce LockCallTemps usage
Using FlushAllRegs/LockCallTemps in integer arithmetics causes
excess register flushing and clobbering. This patch adds API that
allows to flush, clobber and lock only those registers we really
need for calculations.
Change-Id: Idabaa4fff4d18a33e5040a80f66f2df6432f8be0
Signed-off-by: Max Kazantsev <maxim.kazantsev@intel.com>
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index a85e02f..dd4d661 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -22,6 +22,7 @@
#include "x86_lir.h"
#include <map>
+#include <vector>
namespace art {
@@ -61,6 +62,15 @@
bool initialized_;
};
+ class ExplicitTempRegisterLock {
+ public:
+ ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir, int n_regs, ...);
+ ~ExplicitTempRegisterLock();
+ protected:
+ std::vector<RegStorage> temp_regs_;
+ X86Mir2Lir* const mir_to_lir_;
+ };
+
public:
X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 196155e..a745339 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -49,8 +49,8 @@
return;
}
- FlushAllRegs();
- LockCallTemps(); // Prepare for explicit register usage
+ // Prepare for explicit register usage
+ ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
LoadValueDirectWideFixed(rl_src1, r_tmp1);
@@ -398,8 +398,8 @@
return;
}
- FlushAllRegs();
- LockCallTemps(); // Prepare for explicit register usage
+ // Prepare for explicit register usage
+ ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3);
RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
LoadValueDirectWideFixed(rl_src1, r_tmp1);
@@ -768,8 +768,9 @@
RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2, bool is_div, bool check_zero) {
// We have to use fixed registers, so flush all the temps.
- FlushAllRegs();
- LockCallTemps(); // Prepare for explicit register usage.
+
+ // Prepare for explicit register usage.
+ ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
// Load LHS into EAX.
LoadValueDirectFixed(rl_src1, rs_r0);
@@ -791,11 +792,11 @@
// Have to catch 0x80000000/-1 case, or we will get an exception!
OpRegImm(kOpCmp, rs_r1, -1);
- LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+ LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
// RHS is -1.
OpRegImm(kOpCmp, rs_r0, 0x80000000);
- LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+ LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
branch->target = NewLIR0(kPseudoTargetLabel);
@@ -1606,8 +1607,8 @@
if (!cu_->target64) {
int32_t val_lo = Low32Bits(val);
int32_t val_hi = High32Bits(val);
- FlushAllRegs();
- LockCallTemps(); // Prepare for explicit register usage.
+ // Prepare for explicit register usage.
+ ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
bool src1_in_reg = rl_src1.location == kLocPhysReg;
int displacement = SRegOffset(rl_src1.s_reg_low);
@@ -1690,8 +1691,8 @@
bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
mir_graph_->SRegToVReg(rl_src2.s_reg_low);
- FlushAllRegs();
- LockCallTemps(); // Prepare for explicit register usage.
+ // Prepare for explicit register usage.
+ ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2);
rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
@@ -1714,7 +1715,7 @@
NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
} else {
int displacement = SRegOffset(rl_src2.s_reg_low);
- LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
+ LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
displacement + LOWORD_OFFSET);
AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
true /* is_load */, true /* is_64bit */);
@@ -2154,8 +2155,8 @@
}
// We have to use fixed registers, so flush all the temps.
- FlushAllRegs();
- LockCallTemps(); // Prepare for explicit register usage.
+ // Prepare for explicit register usage.
+ ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q);
// Load LHS into RAX.
LoadValueDirectWideFixed(rl_src1, rs_r0q);
@@ -2171,7 +2172,7 @@
// Have to catch 0x8000000000000000/-1 case, or we will get an exception!
NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
- LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+ LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
// RHS is -1.
LoadConstantWide(rs_r6q, 0x8000000000000000);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index ffe6702..aadb41a 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -14,8 +14,9 @@
* limitations under the License.
*/
-#include <string>
+#include <cstdarg>
#include <inttypes.h>
+#include <string>
#include "backend_x86.h"
#include "codegen_x86.h"
@@ -2919,4 +2920,46 @@
return true;
}
+/**
+ * Lock temp registers for explicit usage. Registers will be freed in destructor.
+ */
+X86Mir2Lir::ExplicitTempRegisterLock::ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir,
+ int n_regs, ...) :
+ temp_regs_(n_regs),
+ mir_to_lir_(mir_to_lir) {
+ va_list regs;
+ va_start(regs, n_regs);
+ for (int i = 0; i < n_regs; i++) {
+ RegStorage reg = *(va_arg(regs, RegStorage*));
+ RegisterInfo* info = mir_to_lir_->GetRegInfo(reg);
+
+ // Make sure we don't have promoted register here.
+ DCHECK(info->IsTemp());
+
+ temp_regs_.push_back(reg);
+ mir_to_lir_->FlushReg(reg);
+
+ if (reg.IsPair()) {
+ RegStorage partner = info->Partner();
+ temp_regs_.push_back(partner);
+ mir_to_lir_->FlushReg(partner);
+ }
+
+ mir_to_lir_->Clobber(reg);
+ mir_to_lir_->LockTemp(reg);
+ }
+
+ va_end(regs);
+}
+
+/*
+ * Free all locked registers.
+ */
+X86Mir2Lir::ExplicitTempRegisterLock::~ExplicitTempRegisterLock() {
+ // Free all locked temps.
+ for (auto it : temp_regs_) {
+ mir_to_lir_->FreeTemp(it);
+ }
+}
+
} // namespace art