ARM: Combine multiply accumulate operations.
Try to combine integer multiply and add(sub) into a MAC operation.
For AArch64, also try to combine long type multiply and add(sub).
Change-Id: Ic85812e941eb5a66abc355cab81a4dd16de1b66e
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index b9d9a11..5d09ae1 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -481,10 +481,10 @@
kThumb2LsrRRR, // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
kThumb2AsrRRR, // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
kThumb2RorRRR, // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
- kThumb2LslRRI5, // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0].
- kThumb2LsrRRI5, // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
- kThumb2AsrRRI5, // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
- kThumb2RorRRI5, // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
+ kThumb2LslRRI5, // lsl [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [00] rm[3..0].
+ kThumb2LsrRRI5, // lsr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [01] rm[3..0].
+ kThumb2AsrRRI5, // asr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [10] rm[3..0].
+ kThumb2RorRRI5, // ror [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [11] rm[3..0].
kThumb2BicRRI8M, // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2AndRRI8M, // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2OrrRRI8M, // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
@@ -512,7 +512,8 @@
kThumb2Vnegs, // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
kThumb2Vmovs_IMM8, // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0].
kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0].
- kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0].
+ kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[11-8] [0000] rm[3-0].
+ kThumb2Mls, // mls [111110110000] rn[19-16] ra[15-12] rd[11-8] [0001] rm[3-0].
kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0].
kThumb2Ldrexd, // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index de93e26..65fb3cd 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -896,6 +896,10 @@
kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
"mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
+ ENCODING_MAP(kThumb2Mls, 0xfb000010,
+ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+ kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
+ "mls", "!0C, !1C, !2C, !3C", 4, kFixupNone),
ENCODING_MAP(kThumb2Umull, 0xfba00000,
kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
kFmtBitBlt, 3, 0,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 0ae7ee3..fa8dfe3 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -182,6 +182,8 @@
void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+ void GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub);
// Required for target - single operation generators.
LIR* OpUnconditionalBranch(LIR* target);
@@ -259,6 +261,8 @@
LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
size_t GetInstructionOffset(LIR* lir);
+ void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE;
+
private:
void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 1a7b439..fe1d126 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1075,6 +1075,17 @@
return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
}
+void ArmMir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub) {
+ rl_src1 = LoadValue(rl_src1, kCoreReg);
+ rl_src2 = LoadValue(rl_src2, kCoreReg);
+ rl_src3 = LoadValue(rl_src3, kCoreReg);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ NewLIR4(is_sub ? kThumb2Mls : kThumb2Mla, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
+ rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
+ StoreValue(rl_dest, rl_result);
+}
+
void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
RegLocation rl_result, int lit,
int first_bit, int second_bit) {
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 7190a49..d374353 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -948,4 +948,30 @@
return count;
}
+void ArmMir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
+ UNUSED(bb);
+ DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode));
+ RegLocation rl_src[3];
+ RegLocation rl_dest = mir_graph_->GetBadLoc();
+ rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
+ switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
+ case kMirOpMaddInt:
+ rl_dest = mir_graph_->GetDest(mir);
+ rl_src[0] = mir_graph_->GetSrc(mir, 0);
+ rl_src[1] = mir_graph_->GetSrc(mir, 1);
+ rl_src[2]= mir_graph_->GetSrc(mir, 2);
+ GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], false);
+ break;
+ case kMirOpMsubInt:
+ rl_dest = mir_graph_->GetDest(mir);
+ rl_src[0] = mir_graph_->GetSrc(mir, 0);
+ rl_src[1] = mir_graph_->GetSrc(mir, 1);
+ rl_src[2]= mir_graph_->GetSrc(mir, 2);
+ GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], true);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected opcode: " << mir->dalvikInsn.opcode;
+ }
+}
+
} // namespace art