Optimizing/ARM: Improve long shifts by 1.
Implement long
Shl(x,1) as LSLS+ADC,
Shr(x,1) as ASR+RRX and
UShr(x,1) as LSR+RRX.
Remove the simplification substituting Shl(x,1) with
ADD(x,x) as it interferes with some other optimizations
instead of helping them. And since it didn't help 64-bit
architectures anyway, codegen is the correct place for it.
This is now implemented for ARM and x86, so only mips32 can
be improved.
Change-Id: Idd14f23292198b2260189e1497ca5411b21743b3
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 12ab68e..c7e412c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3361,7 +3361,19 @@
__ mov(o_l, ShifterOperand(high));
__ LoadImmediate(o_h, 0);
}
- } else { // shift_value < 32
+ } else if (shift_value == 1) {
+ if (op->IsShl()) {
+ __ Lsls(o_l, low, 1);
+ __ adc(o_h, high, ShifterOperand(high));
+ } else if (op->IsShr()) {
+ __ Asrs(o_h, high, 1);
+ __ Rrx(o_l, low);
+ } else {
+ __ Lsrs(o_h, high, 1);
+ __ Rrx(o_l, low);
+ }
+ } else {
+ DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
if (op->IsShl()) {
__ Lsl(o_h, high, shift_value);
__ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b97dc1a..b36870c 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -169,16 +169,6 @@
// src
instruction->ReplaceWith(input_other);
instruction->GetBlock()->RemoveInstruction(instruction);
- } else if (instruction->IsShl() && input_cst->IsOne()) {
- // Replace Shl looking like
- // SHL dst, src, 1
- // with
- // ADD dst, src, src
- HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
- input_other,
- input_other);
- instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
- RecordSimplification();
}
}
}