summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2016-09-05 10:44:04 +0100
committer Vladimir Marko <vmarko@google.com> 2016-09-06 17:37:41 +0100
commit239d6eaff0cbb5c4c0139f7053a012758799f186 (patch)
tree8de26b30a2dfd94f849a38c8901437facbbc53b3
parent9d185da3bef8caf015d3dbf4ad79c520af7ce3b1 (diff)
Change deoptimize entrypoint to save everything.
And implement FPU register retrieval from stack on x86. On Nexus 9, AOSP ToT, the boot.oat size reduction is prebuilt multi-part boot image: - 32-bit boot.oat: -20KiB (-0.03%) - 64-bit boot.oat: -45KiB (-0.06%) on-device built single boot image: - 32-bit boot.oat: -24KiB (-0.04%) - 64-bit boot.oat: -36KiB (-0.05%) Test: Run ART test suite on host and Nexus 9. Bug: 30212852 Change-Id: I5d98e2a24363136d73dfec6100ab02f8eb101911
-rw-r--r--compiler/optimizing/code_generator_arm.cc2
-rw-r--r--compiler/optimizing/code_generator_arm64.cc2
-rw-r--r--compiler/optimizing/code_generator_mips.cc2
-rw-r--r--compiler/optimizing/code_generator_mips64.cc2
-rw-r--r--compiler/optimizing/code_generator_x86.cc2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc2
-rw-r--r--runtime/arch/arm/quick_entrypoints_arm.S2
-rw-r--r--runtime/arch/arm64/quick_entrypoints_arm64.S2
-rw-r--r--runtime/arch/mips/quick_entrypoints_mips.S2
-rw-r--r--runtime/arch/mips64/quick_entrypoints_mips64.S4
-rw-r--r--runtime/arch/x86/quick_entrypoints_x86.S2
-rw-r--r--runtime/arch/x86_64/quick_entrypoints_x86_64.S2
-rw-r--r--runtime/oat.h2
-rw-r--r--runtime/stack.cc7
-rw-r--r--test/534-checker-bce-deoptimization/expected.txt4
-rw-r--r--test/534-checker-bce-deoptimization/src/Main.java74
16 files changed, 97 insertions, 16 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 5d7b491605..2ef1802522 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -344,7 +344,6 @@ class DeoptimizationSlowPathARM : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -1532,6 +1531,7 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3923f526b8..ceceedd793 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -494,7 +494,6 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -3060,6 +3059,7 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 36bb55ab12..92e9cd9067 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -418,7 +418,6 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -3470,6 +3469,7 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 18d928db64..664d498b18 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -376,7 +376,6 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -2631,6 +2630,7 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4b3eddd819..8858def40a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -369,7 +369,6 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -1499,6 +1498,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a60c27045e..5230269730 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -388,7 +388,6 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, instruction_->GetLocations());
x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -1563,6 +1562,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index c51c336d28..a3f053b79d 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1544,7 +1544,7 @@ END art_quick_deoptimize
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
+ SETUP_SAVE_EVERYTHING_FRAME r0
mov r0, r9 @ Set up args.
blx artDeoptimizeFromCompiledCode @ artDeoptimizeFromCompiledCode(Thread*)
END art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 03768af0a4..b476762307 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2529,7 +2529,7 @@ END art_quick_deoptimize
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ SETUP_SAVE_EVERYTHING_FRAME
mov x0, xSELF // Pass thread.
bl artDeoptimizeFromCompiledCode // artDeoptimizeFromCompiledCode(Thread*)
brk 0
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 3d393f69ce..4bd1314d15 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -2094,7 +2094,7 @@ END art_quick_deoptimize
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ SETUP_SAVE_EVERYTHING_FRAME
la $t9, artDeoptimizeFromCompiledCode
jalr $t9 # artDeoptimizeFromCompiledCode(Thread*)
# Returns caller method's frame size.
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 9774eb9f83..0bf2a35bd4 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -2138,8 +2138,8 @@ END art_quick_deoptimize
* will long jump to the upcall with a special exception of -1.
*/
.extern artDeoptimizeFromCompiledCode
-ENTRY art_quick_deoptimize_from_compiled_code
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ENTRY_NO_GP art_quick_deoptimize_from_compiled_code
+ SETUP_SAVE_EVERYTHING_FRAME
jal artDeoptimizeFromCompiledCode # artDeoptimizeFromCompiledCode(Thread*, SP)
# Returns caller method's frame size.
move $a0, rSELF # pass Thread::current
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 67ebf50843..646a80c37d 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1930,7 +1930,7 @@ END_FUNCTION art_quick_deoptimize
* will long jump to the interpreter bridge.
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx
+ SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
subl LITERAL(12), %esp // Align stack.
CFI_ADJUST_CFA_OFFSET(12)
pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index b8057031f3..5ea58af346 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2053,7 +2053,7 @@ END_FUNCTION art_quick_deoptimize
* will long jump to the interpreter bridge.
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+ SETUP_SAVE_EVERYTHING_FRAME
// Stack should be aligned now.
movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread.
call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*)
diff --git a/runtime/oat.h b/runtime/oat.h
index 7c84fe90f9..35d0c92e84 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@ class InstructionSetFeatures;
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '8', '6', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '8', '7', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/stack.cc b/runtime/stack.cc
index ec492edc4e..4678ac6e50 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -319,8 +319,11 @@ bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKin
bool StackVisitor::GetRegisterIfAccessible(uint32_t reg, VRegKind kind, uint32_t* val) const {
const bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
- // X86 float registers are 64-bit and the logic below does not apply.
- DCHECK(!is_float || kRuntimeISA != InstructionSet::kX86);
+ if (kRuntimeISA == InstructionSet::kX86 && is_float) {
+ // X86 float registers are 64-bit and each XMM register is provided as two separate
+ // 32-bit registers by the context.
+ reg = (kind == kDoubleHiVReg) ? (2 * reg + 1) : (2 * reg);
+ }
if (!IsAccessibleRegister(reg, is_float)) {
return false;
diff --git a/test/534-checker-bce-deoptimization/expected.txt b/test/534-checker-bce-deoptimization/expected.txt
index 3823a29f3f..b9a1e27550 100644
--- a/test/534-checker-bce-deoptimization/expected.txt
+++ b/test/534-checker-bce-deoptimization/expected.txt
@@ -1 +1,5 @@
+array[0]=2.5f
+array[1]=2.625f
+array[0]=3.5
+array[1]=3.625
finish
diff --git a/test/534-checker-bce-deoptimization/src/Main.java b/test/534-checker-bce-deoptimization/src/Main.java
index 8cd20f677a..c4e4cbf0b5 100644
--- a/test/534-checker-bce-deoptimization/src/Main.java
+++ b/test/534-checker-bce-deoptimization/src/Main.java
@@ -17,6 +17,8 @@
public class Main {
public static void main(String[] args) {
new Main().run();
+ testPreserveFloat();
+ testPreserveDouble();
System.out.println("finish");
}
@@ -53,5 +55,77 @@ public class Main {
b[i + 1] += c * b[i + 1];
}
}
+
+ /*
+ * Test that we correctly preserve floating point registers when we deoptimize.
+ *
+ * Note: These tests rely on the deoptimization happening before the loop,
+ * so that the loop is interpreted and fills the provided arrays. However,
+ * the BCE transformation can be modified to execute the loop as many times
+ * as the compiler can guarantee no AIOOBE and only deoptimize thereafter,
+ * just before the throwing iteration. Then the floating point registers
+ * would no longer be used after the deoptimization and another approach
+ * would be needed to test this.
+ */
+
+ static public void testPreserveFloat() {
+ float[] array = new float[2];
+ try {
+ $noinline$FloatFill(1.125f, 2.5f, array, 3);
+ throw new Error();
+ } catch (ArrayIndexOutOfBoundsException expected) {
+ System.out.println("array[0]=" + array[0] + "f");
+ System.out.println("array[1]=" + array[1] + "f");
+ }
+ }
+
+ /// CHECK-START: void Main.$noinline$FloatFill(float, float, float[], int) BCE (after)
+ /// CHECK-DAG: Deoptimize
+ /// CHECK-DAG: Deoptimize
+ /// CHECK-DAG: Deoptimize
+ /// CHECK-NOT: Deoptimize
+
+ /// CHECK-START: void Main.$noinline$FloatFill(float, float, float[], int) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+
+ public static void $noinline$FloatFill(float f1, float f2, float[] array, int n) {
+ if (doThrow) { throw new Error(); }
+ for (int i = 0; i < n; ++i) {
+ array[i] = ((i & 1) == 1) ? f1 : f2;
+ f1 += 1.5f;
+ f2 += 2.25f;
+ }
+ }
+
+ static public void testPreserveDouble() {
+ double[] array = new double[2];
+ try {
+ $noinline$DoubleFill(2.125, 3.5, array, 3);
+ throw new Error();
+ } catch (ArrayIndexOutOfBoundsException expected) {
+ System.out.println("array[0]=" + array[0]);
+ System.out.println("array[1]=" + array[1]);
+ }
+ }
+
+ /// CHECK-START: void Main.$noinline$DoubleFill(double, double, double[], int) BCE (after)
+ /// CHECK-DAG: Deoptimize
+ /// CHECK-DAG: Deoptimize
+ /// CHECK-DAG: Deoptimize
+ /// CHECK-NOT: Deoptimize
+
+ /// CHECK-START: void Main.$noinline$DoubleFill(double, double, double[], int) BCE (after)
+ /// CHECK-NOT: BoundsCheck
+
+ public static void $noinline$DoubleFill(double d1, double d2, double[] array, int n) {
+ if (doThrow) { throw new Error(); }
+ for (int i = 0; i < n; ++i) {
+ array[i] = ((i & 1) == 1) ? d1 : d2;
+ d1 += 1.5;
+ d2 += 2.25;
+ }
+ }
+
+ public static boolean doThrow = false;
}