MIPS: Use Lsa/Dlsa when possible.
For MIPS32R6 replace instances of "sll/addu" to calculate the
address of an item in an array with "lsa". For other versions of
MIPS32 use the "sll/addu" sequence. Encapsulate this logic in an
assembler method to eliminate having a lot of statements like
"if (IsR6()) { ... } else { ... }" scattered throughout the code.
MIPS64 always supports R6. This means that all instances of
"dsll/daddu" used to calculate the address of an item in an array
can be replaced by "dlsa" so there is no need to encapsulate
conditional logic in a special method. The code can just emit
"dlsa" directly.
Test: mma -j2 ART_TEST_OPTIMIZING=true test-art-target-run-test
Tested on MIPS32, and MIPS64 QEMU.
Test: "make test-art-target-gtest32" on CI20 board.
Test: "cd art; test/testrunner/testrunner.py --target --optimizing --32"
on CI20 board.
Change-Id: Ibe5facc1bc2a6a7a6584e23d3a48e163ae38077d
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 78b31e9..19250c6 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2059,8 +2059,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
__ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_2);
__ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
}
break;
@@ -2107,13 +2106,11 @@
__ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
__ Bc(&done);
__ Bind(&uncompressed_load);
- __ Dsll(TMP, index_reg, TIMES_2);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index_reg, obj, TIMES_2);
__ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
__ Bind(&done);
} else {
- __ Dsll(TMP, index_reg, TIMES_2);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index_reg, obj, TIMES_2);
__ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
}
}
@@ -2129,8 +2126,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
__ LoadFromOffset(load_type, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4);
__ LoadFromOffset(load_type, out, TMP, data_offset, null_checker);
}
break;
@@ -2164,8 +2160,7 @@
// reference, if heap poisoning is enabled).
codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
} else {
- __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Addu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4);
__ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker);
// If read barriers are enabled, emit read barriers other than
// Baker's using a slow path (and also unpoison the loaded
@@ -2188,8 +2183,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_8);
__ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
@@ -2202,8 +2196,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
__ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_4);
__ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
}
break;
@@ -2216,8 +2209,7 @@
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(TMP, obj, TMP);
+ __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_8);
__ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
@@ -2330,8 +2322,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_2);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_2);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2348,8 +2339,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2368,8 +2358,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
DCHECK_EQ(value, 0);
@@ -2399,8 +2388,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
__ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
__ Bc(&done);
@@ -2458,8 +2446,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
__ StoreToOffset(kStoreWord, source, base_reg, data_offset);
@@ -2484,8 +2471,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_8);
}
if (value_location.IsConstant()) {
int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
@@ -2502,8 +2488,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_4);
}
if (value_location.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
@@ -2520,8 +2505,7 @@
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
} else {
- __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
- __ Daddu(base_reg, obj, base_reg);
+ __ Dlsa(base_reg, index.AsRegister<GpuRegister>(), obj, TIMES_8);
}
if (value_location.IsConstant()) {
int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
@@ -4447,8 +4431,11 @@
__ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset);
} else {
GpuRegister index_reg = index.AsRegister<GpuRegister>();
- __ Dsll(TMP, index_reg, scale_factor);
- __ Daddu(TMP, obj, TMP);
+ if (scale_factor == TIMES_1) {
+ __ Daddu(TMP, index_reg, obj);
+ } else {
+ __ Dlsa(TMP, index_reg, obj, scale_factor);
+ }
__ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset);
}
} else {
@@ -6203,8 +6190,7 @@
// We are in the range of the table.
// Load the target address from the jump table, indexing by the value.
__ LoadLabelAddress(AT, table->GetLabel());
- __ Sll(TMP, TMP, 2);
- __ Daddu(TMP, TMP, AT);
+ __ Dlsa(TMP, TMP, AT, 2);
__ Lw(TMP, TMP, 0);
// Compute the absolute target address by adding the table start address
// (the table contains offsets to targets relative to its start).