Codegen tweaks
Minor codegen cleanup. Most significant part of change is
fixing dalvik register use counting to correctly record
cost of high word of register pair. Significant boost to
Reversi benchmark; modest gain for Caffeinemark.
Change-Id: I41819e6d7be93e62d259240269339a94a934f312
diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc
index 2736215..fbc48d4 100644
--- a/src/compiler/codegen/arm/int_arm.cc
+++ b/src/compiler/codegen/arm/int_arm.cc
@@ -137,10 +137,24 @@
switch(ccode) {
case kCondEq:
- OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, not_taken);
- break;
case kCondNe:
- OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, taken);
+ LIR* target;
+ ConditionCode condition;
+ if (ccode == kCondEq) {
+ target = not_taken;
+ condition = kCondEq;
+ } else {
+ target = taken;
+ condition = kCondNe;
+ }
+ if (val == 0) {
+ int t_reg = AllocTemp(cu);
+ NewLIR4(cu, kThumb2OrrRRRs, t_reg, low_reg, high_reg, 0);
+ FreeTemp(cu, t_reg);
+ OpCondBranch(cu, condition, taken);
+ return;
+ }
+ OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, target);
break;
case kCondLt:
OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken);
diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc
index a670199..d6ef6e5 100644
--- a/src/compiler/codegen/arm/utility_arm.cc
+++ b/src/compiler/codegen/arm/utility_arm.cc
@@ -500,6 +500,10 @@
alt_opcode = kThumb2AddRRR;
}
break;
+ case kOpRsub:
+ opcode = kThumb2RsubRRI8;
+ alt_opcode = kThumb2RsubRRR;
+ break;
case kOpAdc:
opcode = kThumb2AdcRRI8;
alt_opcode = kThumb2AdcRRR;
diff --git a/src/compiler/codegen/gen_common.cc b/src/compiler/codegen/gen_common.cc
index 0a46593..2eaa6b0 100644
--- a/src/compiler/codegen/gen_common.cc
+++ b/src/compiler/codegen/gen_common.cc
@@ -1419,13 +1419,14 @@
switch (opcode) {
case Instruction::RSUB_INT_LIT8:
case Instruction::RSUB_INT: {
- int t_reg;
- //TUNING: add support for use of Arm rsub op
rl_src = LoadValue(cu, rl_src, kCoreReg);
- t_reg = AllocTemp(cu);
- LoadConstant(cu, t_reg, lit);
rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
- OpRegRegReg(cu, kOpSub, rl_result.low_reg, t_reg, rl_src.low_reg);
+ if (cu->instruction_set == kThumb2) {
+ OpRegRegImm(cu, kOpRsub, rl_result.low_reg, rl_src.low_reg, lit);
+ } else {
+ OpRegReg(cu, kOpNeg, rl_result.low_reg, rl_src.low_reg);
+ OpRegImm(cu, kOpAdd, rl_result.low_reg, lit);
+ }
StoreValue(cu, rl_dest, rl_result);
return;
}
diff --git a/src/compiler/codegen/ralloc_util.cc b/src/compiler/codegen/ralloc_util.cc
index 1d5f3ac..3a3aeba 100644
--- a/src/compiler/codegen/ralloc_util.cc
+++ b/src/compiler/codegen/ralloc_util.cc
@@ -1082,27 +1082,22 @@
static void CountRefs(CompilationUnit *cu, BasicBlock* bb, RefCounts* core_counts,
RefCounts* fp_counts)
{
+ // TUNING: this routine could use some tweaking.
if ((cu->disable_opt & (1 << kPromoteRegs)) ||
!((bb->block_type == kEntryBlock) || (bb->block_type == kExitBlock) ||
(bb->block_type == kDalvikByteCode))) {
return;
}
- for (int i = 0; i < cu->num_ssa_regs;) {
+ for (int i = 0; i < cu->num_ssa_regs; i++) {
RegLocation loc = cu->reg_location[i];
RefCounts* counts = loc.fp ? fp_counts : core_counts;
int p_map_idx = SRegToPMap(cu, loc.s_reg_low);
//Don't count easily regenerated immediates
- if (loc.fp || loc.wide || !IsInexpensiveConstant(cu, loc)) {
- counts[p_map_idx].count += cu->use_counts.elem_list[i];
+ if (loc.fp || !IsInexpensiveConstant(cu, loc)) {
+ counts[p_map_idx].count += cu->raw_use_counts.elem_list[i];
}
- if (loc.wide) {
- if (loc.fp) {
- counts[p_map_idx].double_start = true;
- counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1];
- }
- i += 2;
- } else {
- i++;
+ if (loc.wide && loc.fp && !loc.high_word) {
+ counts[p_map_idx].double_start = true;
}
}
}