ARM: Combine multiply accumulate operations.
Try to combine integer multiply and add(sub) into a MAC operation.
For AArch64, also try to combine long type multiply and add(sub).
Change-Id: Ic85812e941eb5a66abc355cab81a4dd16de1b66e
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index b9d9a11..5d09ae1 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -481,10 +481,10 @@
kThumb2LsrRRR, // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
kThumb2AsrRRR, // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
kThumb2RorRRR, // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
- kThumb2LslRRI5, // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0].
- kThumb2LsrRRI5, // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
- kThumb2AsrRRI5, // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
- kThumb2RorRRI5, // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
+ kThumb2LslRRI5, // lsl [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [00] rm[3..0].
+ kThumb2LsrRRI5, // lsr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [01] rm[3..0].
+ kThumb2AsrRRI5, // asr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [10] rm[3..0].
+ kThumb2RorRRI5, // ror [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [11] rm[3..0].
kThumb2BicRRI8M, // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2AndRRI8M, // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2OrrRRI8M, // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
@@ -512,7 +512,8 @@
kThumb2Vnegs, // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
kThumb2Vmovs_IMM8, // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0].
kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0].
- kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0].
+ kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[11-8] [0000] rm[3-0].
+ kThumb2Mls, // mls [111110110000] rn[19-16] ra[15-12] rd[11-8] [0001] rm[3-0].
kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0].
kThumb2Ldrexd, // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index de93e26..65fb3cd 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -896,6 +896,10 @@
kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
"mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
+ ENCODING_MAP(kThumb2Mls, 0xfb000010,
+ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+ kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
+ "mls", "!0C, !1C, !2C, !3C", 4, kFixupNone),
ENCODING_MAP(kThumb2Umull, 0xfba00000,
kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
kFmtBitBlt, 3, 0,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 0ae7ee3..fa8dfe3 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -182,6 +182,8 @@
void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+ void GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub);
// Required for target - single operation generators.
LIR* OpUnconditionalBranch(LIR* target);
@@ -259,6 +261,8 @@
LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
size_t GetInstructionOffset(LIR* lir);
+ void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE;
+
private:
void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 1a7b439..fe1d126 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1075,6 +1075,17 @@
return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
}
+void ArmMir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub) {
+ rl_src1 = LoadValue(rl_src1, kCoreReg);
+ rl_src2 = LoadValue(rl_src2, kCoreReg);
+ rl_src3 = LoadValue(rl_src3, kCoreReg);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ NewLIR4(is_sub ? kThumb2Mls : kThumb2Mla, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
+ rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
+ StoreValue(rl_dest, rl_result);
+}
+
void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
RegLocation rl_result, int lit,
int first_bit, int second_bit) {
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 7190a49..d374353 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -948,4 +948,30 @@
return count;
}
+void ArmMir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
+ UNUSED(bb);
+ DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode));
+ RegLocation rl_src[3];
+ RegLocation rl_dest = mir_graph_->GetBadLoc();
+ rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
+ switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
+ case kMirOpMaddInt:
+ rl_dest = mir_graph_->GetDest(mir);
+ rl_src[0] = mir_graph_->GetSrc(mir, 0);
+ rl_src[1] = mir_graph_->GetSrc(mir, 1);
+ rl_src[2]= mir_graph_->GetSrc(mir, 2);
+ GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], false);
+ break;
+ case kMirOpMsubInt:
+ rl_dest = mir_graph_->GetDest(mir);
+ rl_src[0] = mir_graph_->GetSrc(mir, 0);
+ rl_src[1] = mir_graph_->GetSrc(mir, 1);
+ rl_src[2]= mir_graph_->GetSrc(mir, 2);
+ GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], true);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected opcode: " << mir->dalvikInsn.opcode;
+ }
+}
+
} // namespace art
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index f8a7310..943c5c1 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -312,6 +312,7 @@
kA64Lsl3rrr, // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0].
kA64Lsr3rrd, // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}".
kA64Lsr3rrr, // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0].
+ kA64Madd4rrrr, // madd[s0011011000] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
kA64Movk3rdM, // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0].
kA64Movn3rdM, // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0].
kA64Movz3rdM, // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index cab11cc..1fed5da 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -445,6 +445,10 @@
kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
"lsr", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Madd4rrrr), SF_VARIANTS(0x1b000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtRegR, 14, 10, IS_QUAD_OP | REG_DEF0_USE123 | NEEDS_FIXUP,
+ "madd", "!0r, !1r, !2r, !3r", kFixupA53Erratum835769),
ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000),
kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0,
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 766ac23..55866e2 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -193,6 +193,10 @@
void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+ void GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub);
+ void GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub);
// Required for target - single operation generators.
LIR* OpUnconditionalBranch(LIR* target) OVERRIDE;
@@ -226,6 +230,8 @@
bool InexpensiveConstantLong(int64_t value) OVERRIDE;
bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
+ void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE;
+
bool WideGPRsAreAliases() const OVERRIDE {
return true; // 64b architecture.
}
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 57e67d5..5ac2aa0 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -949,10 +949,33 @@
UNREACHABLE();
}
+void Arm64Mir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub) {
+ rl_src1 = LoadValue(rl_src1, kCoreReg);
+ rl_src2 = LoadValue(rl_src2, kCoreReg);
+ rl_src3 = LoadValue(rl_src3, kCoreReg);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ NewLIR4(is_sub ? kA64Msub4rrrr : kA64Madd4rrrr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
+ rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
+ StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+ RegLocation rl_src3, bool is_sub) {
+ rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+ rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+ rl_src3 = LoadValueWide(rl_src3, kCoreReg);
+ RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+ NewLIR4(is_sub ? WIDE(kA64Msub4rrrr) : WIDE(kA64Madd4rrrr), rl_result.reg.GetReg(),
+ rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
+ StoreValueWide(rl_dest, rl_result);
+}
+
void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
RegLocation rl_result, int lit ATTRIBUTE_UNUSED,
int first_bit, int second_bit) {
- OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit));
+ OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
+ EncodeShift(kA64Lsl, second_bit - first_bit));
if (first_bit != 0) {
OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
}
@@ -1686,7 +1709,8 @@
RegLocation rl_src_i = info->args[0];
RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info); // result reg
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
- RegLocation rl_i = IsWide(size) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
+ RegLocation rl_i = IsWide(size) ?
+ LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
IsWide(size) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
return true;
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index e7fa8ed..030c5ed 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -849,4 +849,35 @@
return count;
}
+void Arm64Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
+ UNUSED(bb);
+ DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode));
+ RegLocation rl_src[3];
+ RegLocation rl_dest = mir_graph_->GetBadLoc();
+ rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc();
+ ExtendedMIROpcode opcode = static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode);
+ switch (opcode) {
+ case kMirOpMaddInt:
+ case kMirOpMsubInt:
+ rl_dest = mir_graph_->GetDest(mir);
+ rl_src[0] = mir_graph_->GetSrc(mir, 0);
+ rl_src[1] = mir_graph_->GetSrc(mir, 1);
+ rl_src[2]= mir_graph_->GetSrc(mir, 2);
+ GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2],
+ (opcode == kMirOpMsubInt) ? true : false);
+ break;
+ case kMirOpMaddLong:
+ case kMirOpMsubLong:
+ rl_dest = mir_graph_->GetDestWide(mir);
+ rl_src[0] = mir_graph_->GetSrcWide(mir, 0);
+ rl_src[1] = mir_graph_->GetSrcWide(mir, 2);
+ rl_src[2] = mir_graph_->GetSrcWide(mir, 4);
+ GenMaddMsubLong(rl_dest, rl_src[0], rl_src[1], rl_src[2],
+ (opcode == kMirOpMsubLong) ? true : false);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected opcode: " << static_cast<int>(opcode);
+ }
+}
+
} // namespace art