vixl32: do not use D14 as a temporary.
D14 is a callee-save register, which means we would need to
save it in the prologue, but at the point we're using it
the prologue has already been generated.
bug: 35977033
test: m ART_USE_VIXL_ARM_BACKEND=true test-art-target
Change-Id: Id7340ad9e87a9e527ce0989f45aae0b3a0963206
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index f5ada52..d75779c 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2000,15 +2000,10 @@
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
// Always save the LR register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(LR));
- // Give d14 and d15 as scratch registers to VIXL.
- // They are removed from the register allocator in `SetupBlockedRegisters()`.
- // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
- // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
- // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
- // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
- // also want to investigate giving those 14 other D registers to the allocator.
- GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
- GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
+ // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
+ // S0-S31, which alias to D0-D15.
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
+ GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
}
void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
@@ -2074,13 +2069,6 @@
// Reserve temp register.
blocked_core_registers_[IP] = true;
- // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
- // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
- blocked_fpu_registers_[28] = true;
- blocked_fpu_registers_[29] = true;
- blocked_fpu_registers_[30] = true;
- blocked_fpu_registers_[31] = true;
-
if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
@@ -6549,13 +6537,16 @@
void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
// TODO(VIXL32): Double check the performance of this implementation.
UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
- vixl32::SRegister temp_1 = temps.AcquireS();
- vixl32::SRegister temp_2 = temps.AcquireS();
+ vixl32::Register temp1 = temps.Acquire();
+ ScratchRegisterScope ensure_scratch(
+ this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
+ vixl32::Register temp2(ensure_scratch.GetRegister());
- __ Vldr(temp_1, MemOperand(sp, mem1));
- __ Vldr(temp_2, MemOperand(sp, mem2));
- __ Vstr(temp_1, MemOperand(sp, mem2));
- __ Vstr(temp_2, MemOperand(sp, mem1));
+ int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+ GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
+ GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
+ GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
+ GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
}
void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
@@ -6578,7 +6569,7 @@
} else if (source.IsStackSlot() && destination.IsStackSlot()) {
Exchange(source.GetStackIndex(), destination.GetStackIndex());
} else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
- vixl32::SRegister temp = temps.AcquireS();
+ vixl32::Register temp = temps.Acquire();
__ Vmov(temp, SRegisterFrom(source));
__ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
__ Vmov(SRegisterFrom(destination), temp);
@@ -6637,12 +6628,12 @@
}
}
-void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
- TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
+ __ Push(vixl32::Register(reg));
}
-void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) {
- TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
+ __ Pop(vixl32::Register(reg));
}
HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index e5eef37..6afc3dd 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -230,6 +230,7 @@
if (!CanHoldStoreOffsetThumb(type, offset)) {
CHECK_NE(base.GetCode(), kIpCode);
if ((reg.GetCode() != kIpCode) &&
+ (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
tmp_reg = temps.Acquire();
} else {
diff --git a/test/642-fp-callees/expected.txt b/test/642-fp-callees/expected.txt
new file mode 100644
index 0000000..77a1486
--- /dev/null
+++ b/test/642-fp-callees/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Done
diff --git a/test/642-fp-callees/fp_callees.cc b/test/642-fp-callees/fp_callees.cc
new file mode 100644
index 0000000..600f969
--- /dev/null
+++ b/test/642-fp-callees/fp_callees.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "jni.h"
+
+namespace art {
+
+// Make the array volatile, which is apparently making the C compiler
+// use FP registers in the method below.
+volatile double array[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
+
+extern "C" JNIEXPORT void JNICALL Java_Main_holdFpTemporaries(JNIEnv* env, jclass cls) {
+ jmethodID mid = env->GetStaticMethodID(cls, "caller", "(IIJ)V");
+ CHECK(mid != nullptr);
+ // Load values from the arrays, which will be loaded in callee-save FP registers.
+ double a = array[0];
+ double b = array[1];
+ double c = array[2];
+ double d = array[3];
+ double e = array[4];
+ double f = array[5];
+ double g = array[6];
+ double h = array[7];
+ double i = array[8];
+ double j = array[9];
+ double k = array[10];
+ double l = array[11];
+ env->CallStaticVoidMethod(cls, mid, 1, 1, 1L);
+ // Load it in a temporary to please C compiler with bit_cast.
+ double temp = array[0];
+ CHECK_EQ(bit_cast<int64_t>(a), bit_cast<int64_t>(temp));
+ temp = array[1];
+ CHECK_EQ(bit_cast<int64_t>(b), bit_cast<int64_t>(temp));
+ temp = array[2];
+ CHECK_EQ(bit_cast<int64_t>(c), bit_cast<int64_t>(temp));
+ temp = array[3];
+ CHECK_EQ(bit_cast<int64_t>(d), bit_cast<int64_t>(temp));
+ temp = array[4];
+ CHECK_EQ(bit_cast<int64_t>(e), bit_cast<int64_t>(temp));
+ temp = array[5];
+ CHECK_EQ(bit_cast<int64_t>(f), bit_cast<int64_t>(temp));
+ temp = array[6];
+ CHECK_EQ(bit_cast<int64_t>(g), bit_cast<int64_t>(temp));
+ temp = array[7];
+ CHECK_EQ(bit_cast<int64_t>(h), bit_cast<int64_t>(temp));
+ temp = array[8];
+ CHECK_EQ(bit_cast<int64_t>(i), bit_cast<int64_t>(temp));
+ temp = array[9];
+ CHECK_EQ(bit_cast<int64_t>(j), bit_cast<int64_t>(temp));
+ temp = array[10];
+ CHECK_EQ(bit_cast<int64_t>(k), bit_cast<int64_t>(temp));
+ temp = array[11];
+ CHECK_EQ(bit_cast<int64_t>(l), bit_cast<int64_t>(temp));
+}
+
+} // namespace art
diff --git a/test/642-fp-callees/info.txt b/test/642-fp-callees/info.txt
new file mode 100644
index 0000000..d3e4bda
--- /dev/null
+++ b/test/642-fp-callees/info.txt
@@ -0,0 +1,2 @@
+Regression test for vixl32 backend, which used to incorrectly
+use D14 as a temporary register.
diff --git a/test/642-fp-callees/src/Main.java b/test/642-fp-callees/src/Main.java
new file mode 100644
index 0000000..fa57c93
--- /dev/null
+++ b/test/642-fp-callees/src/Main.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+ public static void main(String[] args) {
+ System.loadLibrary(args[0]);
+ holdFpTemporaries();
+ System.out.println("Done");
+ }
+
+ public static void caller(int a, int b, long c) {
+ $noinline$callee(a, b, c);
+ }
+
+ // This method is "no inline", in order to generate the
+ // bad floating point use at the call site.
+ public static void $noinline$callee(int a, int b, long c) {
+ }
+
+ public native static void holdFpTemporaries();
+}
diff --git a/test/Android.bp b/test/Android.bp
index 594cce2..9911af0 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -336,6 +336,7 @@
"596-monitor-inflation/monitor_inflation.cc",
"597-deopt-new-string/deopt.cc",
"626-const-class-linking/clear_dex_cache_types.cc",
+ "642-fp-callees/fp_callees.cc",
],
shared_libs: [
"libbacktrace",