MIPS: Use Lsa/Dlsa when possible.
For MIPS32R6 replace instances of "sll/addu" to calculate the
address of an item in an array with "lsa". For other versions of
MIPS32 use the "sll/addu" sequence. Encapsulate this logic in an
assembler method to eliminate having a lot of statements like
"if (IsR6()) { ... } else { ... }" scattered throughout the code.
MIPS64 always supports R6. This means that all instances of
"dsll/daddu" used to calculate the address of an item in an array
can be replaced by "dlsa" so there is no need to encapsulate
conditional logic in a special method. The code can just emit
"dlsa" directly.
Test: mma -j2 ART_TEST_OPTIMIZING=true test-art-target-run-test
Tested on MIPS32, and MIPS64 QEMU.
Test: "make test-art-target-gtest32" on CI20 board.
Test: "cd art; test/testrunner/testrunner.py --target --optimizing --32"
on CI20 board.
Change-Id: Ibe5facc1bc2a6a7a6584e23d3a48e163ae38077d
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 287891f..aa030b2 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2509,8 +2509,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
__ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_2, TMP);
__ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
}
break;
@@ -2557,13 +2556,11 @@
__ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
__ B(&done);
__ Bind(&uncompressed_load);
- __ Sll(TMP, index_reg, TIMES_2);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP);
__ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
__ Bind(&done);
} else {
- __ Sll(TMP, index_reg, TIMES_2);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP);
__ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
}
}
@@ -2578,8 +2575,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
__ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP);
__ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
}
break;
@@ -2613,8 +2609,7 @@
// reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP);
__ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
// If read barriers are enabled, emit read barriers other than
// Baker's using a slow path (and also unpoison the loaded
@@ -2637,8 +2632,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP);
__ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
@@ -2651,8 +2645,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
__ LoadSFromOffset(out, obj, offset, null_checker);
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP);
__ LoadSFromOffset(out, TMP, data_offset, null_checker);
}
break;
@@ -2665,8 +2658,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ LoadDFromOffset(out, obj, offset, null_checker);
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_8);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP);
__ LoadDFromOffset(out, TMP, data_offset, null_checker);
}
break;
@@ -2779,8 +2771,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_2);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_2, base_reg);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2797,8 +2788,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2817,8 +2807,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
}
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
DCHECK_EQ(value, 0);
@@ -2848,8 +2837,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
}
__ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
__ B(&done);
@@ -2907,8 +2895,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
}
__ StoreToOffset(kStoreWord, source, base_reg, data_offset);
@@ -2933,8 +2920,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_8);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg);
}
if (value_location.IsConstant()) {
int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
@@ -2951,8 +2937,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2969,8 +2954,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Sll(base_reg, index.AsRegister<Register>(), TIMES_8);
- __ Addu(base_reg, obj, base_reg);
+ __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg);
}
if (value_location.IsConstant()) {
int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
@@ -6351,8 +6335,7 @@
Register index_reg = index.IsRegisterPair()
? index.AsRegisterPairLow<Register>()
: index.AsRegister<Register>();
- __ Sll(TMP, index_reg, scale_factor);
- __ Addu(TMP, obj, TMP);
+ __ ShiftAndAdd(TMP, index_reg, obj, scale_factor, TMP);
__ LoadFromOffset(kLoadWord, ref_reg, TMP, offset);
}
} else {
@@ -8446,8 +8429,7 @@
// We are in the range of the table.
// Load the target address from the jump table, indexing by the value.
__ LoadLabelAddress(AT, constant_area, table->GetLabel());
- __ Sll(TMP, TMP, 2);
- __ Addu(TMP, TMP, AT);
+ __ ShiftAndAdd(TMP, TMP, AT, 2, TMP);
__ Lw(TMP, TMP, 0);
// Compute the absolute target address by adding the table start address
// (the table contains offsets to targets relative to its start).
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 78b31e9..19250c6 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2059,8 +2059,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
__ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_2);
__ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
}
break;
@@ -2107,13 +2106,11 @@
__ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
__ Bc(&done);
__ Bind(&uncompressed_load);
- __ Dsll(TMP, index_reg, TIMES_2);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index_reg, obj, TIMES_2);
__ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
__ Bind(&done);
} else {
- __ Dsll(TMP, index_reg, TIMES_2);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index_reg, obj, TIMES_2);
__ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
}
}
@@ -2129,8 +2126,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
__ LoadFromOffset(load_type, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4);
__ LoadFromOffset(load_type, out, TMP, data_offset, null_checker);
}
break;
@@ -2164,8 +2160,7 @@
// reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Addu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4);
__ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker);
// If read barriers are enabled, emit read barriers other than
// Baker's using a slow path (and also unpoison the loaded
@@ -2188,8 +2183,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_8);
__ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
@@ -2202,8 +2196,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
__ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4);
__ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
}
break;
@@ -2216,8 +2209,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_8);
__ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
@@ -2330,8 +2322,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_2);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_2);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2348,8 +2339,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2368,8 +2358,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
DCHECK_EQ(value, 0);
@@ -2399,8 +2388,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
__ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
__ Bc(&done);
@@ -2458,8 +2446,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
__ StoreToOffset(kStoreWord, source, base_reg, data_offset);
@@ -2484,8 +2471,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_8);
}
if (value_location.IsConstant()) {
int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
@@ -2502,8 +2488,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2520,8 +2505,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_8);
}
if (value_location.IsConstant()) {
int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
@@ -4447,8 +4431,11 @@
__ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset);
} else {
GpuRegister index_reg = index.AsRegister<GpuRegister>();
- __ Dsll(TMP, index_reg, scale_factor);
- __ Daddu(TMP, obj, TMP);
+ if (scale_factor == TIMES_1) {
+ __ Daddu(TMP, index_reg, obj);
+ } else {
+ __ Dlsa(TMP, index_reg, obj, scale_factor);
+ }
__ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset);
}
} else {
@@ -6203,8 +6190,7 @@
// We are in the range of the table.
// Load the target address from the jump table, indexing by the value.
__ LoadLabelAddress(AT, table->GetLabel());
- __ Sll(TMP, TMP, 2);
- __ Daddu(TMP, TMP, AT);
+ __ Dlsa(TMP, TMP, AT, 2);
__ Lw(TMP, TMP, 0);
// Compute the absolute target address by adding the table start address
// (the table contains offsets to targets relative to its start).
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index b67793c..900b00e 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2701,12 +2701,7 @@
// Calculate destination address.
__ Addiu(dstPtr, dstObj, data_offset);
- if (IsR6()) {
- __ Lsa(dstPtr, dstBegin, dstPtr, char_shift);
- } else {
- __ Sll(AT, dstBegin, char_shift);
- __ Addu(dstPtr, dstPtr, AT);
- }
+ __ ShiftAndAdd(dstPtr, dstBegin, dstPtr, char_shift);
if (mirror::kUseStringCompression) {
MipsLabel uncompressed_copy, compressed_loop;
@@ -2734,12 +2729,7 @@
// Calculate source address.
__ Addiu(srcPtr, srcObj, value_offset);
- if (IsR6()) {
- __ Lsa(srcPtr, srcBegin, srcPtr, char_shift);
- } else {
- __ Sll(AT, srcBegin, char_shift);
- __ Addu(srcPtr, srcPtr, AT);
- }
+ __ ShiftAndAdd(srcPtr, srcBegin, srcPtr, char_shift);
__ Bind(&loop);
__ Lh(AT, srcPtr, 0);
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 2e2231b..a99d02d 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -635,6 +635,7 @@
DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt);
}
+// TODO: This instruction is available in both R6 and MSA and it should be used when available.
void MipsAssembler::Lsa(Register rd, Register rs, Register rt, int saPlusOne) {
CHECK(IsR6());
CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne;
@@ -642,6 +643,24 @@
DsFsmInstrRrr(EmitR(0x0, rs, rt, rd, sa, 0x05), rd, rs, rt);
}
+void MipsAssembler::ShiftAndAdd(Register dst,
+ Register src_idx,
+ Register src_base,
+ int shamt,
+ Register tmp) {
+ CHECK(0 <= shamt && shamt <= 4) << shamt;
+ CHECK_NE(src_base, tmp);
+ if (shamt == TIMES_1) {
+ // Catch the special case where the shift amount is zero (0).
+ Addu(dst, src_base, src_idx);
+ } else if (IsR6()) {
+ Lsa(dst, src_idx, src_base, shamt);
+ } else {
+ Sll(tmp, src_idx, shamt);
+ Addu(dst, src_base, tmp);
+ }
+}
+
void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) {
DsFsmInstrRrr(EmitI(0x20, rs, rt, imm16), rt, rs, rs);
}
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 1a5a23d..463daeb 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -263,6 +263,7 @@
void Ext(Register rd, Register rt, int pos, int size); // R2+
void Ins(Register rd, Register rt, int pos, int size); // R2+
void Lsa(Register rd, Register rs, Register rt, int saPlusOne); // R6
+ void ShiftAndAdd(Register dst, Register src_idx, Register src_base, int shamt, Register tmp = AT);
void Lb(Register rt, Register rs, uint16_t imm16);
void Lh(Register rt, Register rs, uint16_t imm16);