MIPS32: Fill branch delay slots
Test: booted MIPS32 in QEMU
Test: test-art-host-gtest
Test: test-art-target-gtest
Test: test-art-target-run-test-optimizing on CI20
Change-Id: I727e80753395ab99fff004cb5d2e0a06409150d7
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0d3f849..b0de964 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -753,7 +753,7 @@
}
// Collect PC infos for the mapping table.
- uint32_t native_pc = GetAssembler()->CodeSize();
+ uint32_t native_pc = GetAssembler()->CodePosition();
if (instruction == nullptr) {
// For stack overflow checks and native-debug-info entries without dex register
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8a2f90d..e0de03b 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -792,12 +792,24 @@
// TODO: __ cfi().Restore(DWARFReg(reg));
}
- __ DecreaseFrameSize(GetFrameSize());
+ size_t frame_size = GetFrameSize();
+ // Adjust the stack pointer in the delay slot if doing so doesn't break CFI.
+ bool exchange = IsInt<16>(static_cast<int32_t>(frame_size));
+ bool reordering = __ SetReorder(false);
+ if (exchange) {
+ __ Jr(RA);
+ __ DecreaseFrameSize(frame_size); // Single instruction in delay slot.
+ } else {
+ __ DecreaseFrameSize(frame_size);
+ __ Jr(RA);
+ __ Nop(); // In delay slot.
+ }
+ __ SetReorder(reordering);
+ } else {
+ __ Jr(RA);
+ __ NopIfNoReordering();
}
- __ Jr(RA);
- __ Nop();
-
__ cfi().RestoreState();
__ cfi().DefCFAOffset(GetFrameSize());
}
@@ -1251,6 +1263,7 @@
uint32_t dex_pc,
SlowPathCode* slow_path,
bool is_direct_entrypoint) {
+ bool reordering = __ SetReorder(false);
__ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
__ Jalr(T9);
if (is_direct_entrypoint) {
@@ -1262,6 +1275,7 @@
} else {
__ Nop(); // In delay slot.
}
+ __ SetReorder(reordering);
RecordPcInfo(instruction, dex_pc, slow_path);
}
@@ -3953,7 +3967,7 @@
__ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
// T9();
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -4254,7 +4268,7 @@
// T9 prepared above for better instruction scheduling.
// T9()
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
// TODO: Implement this type.
@@ -4270,7 +4284,7 @@
kMipsPointerSize).Int32Value());
// T9()
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
break;
}
DCHECK(!IsLeafMethod());
@@ -4312,7 +4326,7 @@
__ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
// T9();
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
}
void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4421,6 +4435,7 @@
DCHECK(!kEmitCompilerReadBarrier);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ bool reordering = __ SetReorder(false);
if (isR6) {
__ Bind(&info->high_label);
__ Bind(&info->pc_rel_label);
@@ -4436,6 +4451,7 @@
// Add a 32-bit offset to PC.
__ Addu(out, out, base_or_current_method_reg);
}
+ __ SetReorder(reordering);
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
@@ -4579,6 +4595,7 @@
DCHECK(!kEmitCompilerReadBarrier);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+ bool reordering = __ SetReorder(false);
if (isR6) {
__ Bind(&info->high_label);
__ Bind(&info->pc_rel_label);
@@ -4594,6 +4611,7 @@
// Add a 32-bit offset to PC.
__ Addu(out, out, base_or_current_method_reg);
}
+ __ SetReorder(reordering);
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageAddress: {
@@ -4851,7 +4869,7 @@
__ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
__ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
} else {
codegen_->InvokeRuntime(
@@ -5751,7 +5769,7 @@
Register reg = base->GetLocations()->Out().AsRegister<Register>();
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
-
+ bool reordering = __ SetReorder(false);
if (codegen_->GetInstructionSetFeatures().IsR6()) {
__ Bind(&info->high_label);
__ Bind(&info->pc_rel_label);
@@ -5769,6 +5787,7 @@
__ Addu(reg, reg, RA);
// TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
}
+ __ SetReorder(reordering);
}
void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 6e5eb66..862a93f 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1901,7 +1901,7 @@
TR,
QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pStringCompareTo).Int32Value());
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
__ Bind(slow_path->GetExitLabel());
}
@@ -2060,7 +2060,7 @@
TR,
QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pIndexOf).Int32Value());
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
if (slow_path != nullptr) {
__ Bind(slow_path->GetExitLabel());
@@ -2146,7 +2146,7 @@
TR,
QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromBytes).Int32Value());
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
@@ -2179,7 +2179,7 @@
TR,
QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromChars).Int32Value());
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -2208,7 +2208,7 @@
TR,
QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromString).Int32Value());
__ Jalr(T9);
- __ Nop();
+ __ NopIfNoReordering();
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 05eb063..6c5030c 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -144,12 +144,12 @@
0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7,
- 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x09, 0x00, 0xE0, 0x03, 0x40, 0x00, 0xBD, 0x27,
};
static constexpr uint8_t expected_cfi_kMips[] = {
0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
- 0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48,
- 0x0B, 0x0E, 0x40,
+ 0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B,
+ 0x0E, 0x40,
};
// 0x00000000: addiu r29, r29, -64
// 0x00000004: .cfi_def_cfa_offset: 64
@@ -171,12 +171,11 @@
// 0x00000028: .cfi_restore: r16
// 0x00000028: ldc1 f22, +40(r29)
// 0x0000002c: ldc1 f20, +32(r29)
-// 0x00000030: addiu r29, r29, 64
-// 0x00000034: .cfi_def_cfa_offset: 0
-// 0x00000034: jr r31
-// 0x00000038: nop
-// 0x0000003c: .cfi_restore_state
-// 0x0000003c: .cfi_def_cfa_offset: 64
+// 0x00000030: jr r31
+// 0x00000034: addiu r29, r29, 64
+// 0x00000038: .cfi_def_cfa_offset: 0
+// 0x00000038: .cfi_restore_state
+// 0x00000038: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64[] = {
0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
@@ -348,14 +347,13 @@
};
static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F,
- 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x40, 0x00, 0xBD, 0x27,
- 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x09, 0x00, 0xE0, 0x03,
+ 0x40, 0x00, 0xBD, 0x27,
};
static constexpr uint8_t expected_cfi_kMips_adjust[] = {
0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
- 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
- 0x40,
+ 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40,
};
// 0x00000000: addiu r29, r29, -64
// 0x00000004: .cfi_def_cfa_offset: 64
@@ -392,12 +390,11 @@
// 0x00020054: .cfi_restore: r16
// 0x00020054: ldc1 f22, +40(r29)
// 0x00020058: ldc1 f20, +32(r29)
-// 0x0002005c: addiu r29, r29, 64
-// 0x00020060: .cfi_def_cfa_offset: 0
-// 0x00020060: jr r31
-// 0x00020064: nop
-// 0x00020068: .cfi_restore_state
-// 0x00020068: .cfi_def_cfa_offset: 64
+// 0x0002005c: jr r31
+// 0x00020060: addiu r29, r29, 64
+// 0x00020064: .cfi_def_cfa_offset: 0
+// 0x00020064: .cfi_restore_state
+// 0x00020064: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,