Move caller-saves saving/restoring to ReadBarrierMarkRegX.
Instead of saving/restoring live caller-save registers
before/after the call to read barrier mark entry points
ReadBarrierMarkRegX, have these entry points save/restore
all the caller-save registers themselves (except register
rX, which contains the return value).
Also refactor the assembly code of these entry points
using macros.
* Boot image code size variation on Nexus 5X
(aosp_bullhead-userdebug build):
- total ARM64 framework Oat files size change:
119196792 bytes -> 115575920 bytes (-3.04%)
- total ARM framework Oat files size change:
100435212 bytes -> 97621188 bytes (-2.80%)
* Benchmarks (ARM64) score variations on Nexus 5X
(aosp_bullhead-userdebug build):
- RitzPerf (lower is better)
- average score difference: -2.71%
- CaffeineMark (higher is better)
- no real difference for most tests
(absolute variation lower than 1%)
- better score on the "Method" benchmark:
score variation 41253 -> 44891 (+8.82%)
Test: ART host and target (ARM, ARM64) tests.
Bug: 29506760
Bug: 12687968
Change-Id: I881bf73139a3f1c2bee9ffc6fc8c00f9a392afa6
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 47e6625..5e6e175 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -80,7 +80,11 @@
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
+ // Save live core and floating-point caller-save registers and
+ // update the stack mask in `locations` for registers holding object
+ // references.
virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ // Restore live core and floating-point caller-save registers.
virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
bool IsCoreRegisterSaved(int reg) const {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1aa7b54..474e9d5 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -437,11 +437,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // R0 (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
DCHECK_NE(reg, SP);
@@ -469,8 +467,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 07b7823..cec641f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -603,11 +603,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // W0 (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
DCHECK_NE(obj_.reg(), LR);
@@ -635,8 +633,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1cc6060..93bf022 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -472,11 +472,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // EAX (if it is live), as it is clobbered by functions
- // art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
DCHECK_NE(reg, ESP);
@@ -502,8 +500,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a015893..0d85bea 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -493,11 +493,9 @@
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- // Save live registers before the runtime call, and in particular
- // RDI and/or RAX (if they are live), as they are clobbered by
- // functions art_quick_read_barrier_mark_regX.
- SaveLiveRegisters(codegen, locations);
-
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
InvokeRuntimeCallingConvention calling_convention;
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
DCHECK_NE(reg, RSP);
@@ -523,8 +521,6 @@
instruction_,
instruction_->GetDexPc(),
this);
-
- RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 8f18ff3..dfa592c 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -33,7 +33,9 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
@@ -119,7 +121,7 @@
// Read barrier.
qpoints->pReadBarrierJni = ReadBarrierJni;
- qpoints->pReadBarrierMarkReg00 = artReadBarrierMark;
+ qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index f9c34f5..34d3158 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -672,6 +672,12 @@
.endif
.endm
+// Save rReg's value to [sp, #offset].
+.macro PUSH_REG rReg, offset
+ str \rReg, [sp, #\offset] @ save rReg
+ .cfi_rel_offset \rReg, \offset
+.endm
+
/*
* Macro to insert read barrier, only used in art_quick_aput_obj.
* rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
@@ -1752,30 +1758,83 @@
/*
* Create a function `name` calling the ReadBarrier::Mark routine,
* getting its argument and returning its result through register
- * `reg`, thus following a non-standard runtime calling convention:
- * - `reg` is used to pass the (sole) argument of this function
+ * `reg`, saving and restoring all caller-save registers.
+ *
+ * If `reg` is different from `r0`, the generated function follows a
+ * non-standard runtime calling convention:
+ * - register `reg` is used to pass the (sole) argument of this
+ * function (instead of R0);
+ * - register `reg` is used to return the result of this function
* (instead of R0);
- * - `reg` is used to return the result of this function (instead of R0);
* - R0 is treated like a normal (non-argument) caller-save register;
* - everything else is the same as in the standard runtime calling
- * convention (e.g. same callee-save registers).
+ * convention (e.g. standard callee-save registers are preserved).
*/
.macro READ_BARRIER_MARK_REG name, reg
ENTRY \name
- push {lr} @ save return address
- .cfi_adjust_cfa_offset 4
- .cfi_rel_offset lr, 0
- sub sp, #4 @ push padding (native calling convention 8-byte alignment)
- .cfi_adjust_cfa_offset 4
- mov r0, \reg @ pass arg1 - obj from `reg`
- bl artReadBarrierMark @ artReadBarrierMark(obj)
- mov \reg, r0 @ return result into `reg`
- add sp, #4 @ pop padding
- .cfi_adjust_cfa_offset -4
- pop {pc} @ return
+ push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers
+ .cfi_adjust_cfa_offset 32
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset r1, 4
+ .cfi_rel_offset r2, 8
+ .cfi_rel_offset r3, 12
+ .cfi_rel_offset r4, 16
+ .cfi_rel_offset r9, 20
+ .cfi_rel_offset r12, 24
+ .cfi_rel_offset lr, 28
+ vpush {s0-s15} @ save floating-point caller-save registers
+ .cfi_adjust_cfa_offset 64
+
+ .ifnc \reg, r0
+ mov r0, \reg @ pass arg1 - obj from `reg`
+ .endif
+ bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj)
+
+ vpop {s0-s15} @ restore floating-point registers
+ .cfi_adjust_cfa_offset -64
+ @ If `reg` is a caller-save register, save the result to its
+ @ corresponding stack slot; it will be restored by the "pop"
+ @ instruction below. Otherwise, move result into `reg`.
+ @
+ @ (Note that saving `reg` to its stack slot will overwrite the value
+ @ previously stored by the "push" instruction above. That is
+ @ alright, as in that case we know that `reg` is not a live
+ @ register, as it is used to pass the argument and return the result
+ @ of this function.)
+ .ifc \reg, r0
+ PUSH_REG r0, 0 @ copy result to r0's stack location
+ .else
+ .ifc \reg, r1
+ PUSH_REG r0, 4 @ copy result to r1's stack location
+ .else
+ .ifc \reg, r2
+ PUSH_REG r0, 8 @ copy result to r2's stack location
+ .else
+ .ifc \reg, r3
+ PUSH_REG r0, 12 @ copy result to r3's stack location
+ .else
+ .ifc \reg, r4
+ PUSH_REG r0, 16 @ copy result to r4's stack location
+ .else
+ .ifc \reg, r9
+ PUSH_REG r0, 20 @ copy result to r9's stack location
+ .else
+ .ifc \reg, r12
+ PUSH_REG r0, 24 @ copy result to r12's stack location
+ .else
+ mov \reg, r0 @ return result into `reg`
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return
END \name
.endm
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index c3188b6..e30a860 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -33,7 +33,9 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
@@ -122,7 +124,7 @@
// Read barrier.
qpoints->pReadBarrierJni = ReadBarrierJni;
- qpoints->pReadBarrierMarkReg00 = artReadBarrierMark;
+ qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index c893e77..6173ae7 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1253,6 +1253,22 @@
.endif
.endm
+// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
+// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
+.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
+ .ifc \xReg1, \xExclude
+ ldr \xReg2, [sp, #(\offset + 8)] // restore xReg2
+ .else
+ .ifc \xReg2, \xExclude
+ ldr \xReg1, [sp, #\offset] // restore xReg1
+ .else
+ ldp \xReg1, \xReg2, [sp, #\offset] // restore xReg1 and xReg2
+ .endif
+ .endif
+ .cfi_restore \xReg1
+ .cfi_restore \xReg2
+.endm
+
/*
* Macro to insert read barrier, only used in art_quick_aput_obj.
* xDest, wDest and xObj are registers, offset is a defined literal such as
@@ -2222,56 +2238,148 @@
/*
* Create a function `name` calling the ReadBarrier::Mark routine,
- * getting its argument and returning its result through register
- * `reg`, thus following a non-standard runtime calling convention:
- * - `reg` is used to pass the (sole) argument of this function
+ * getting its argument and returning its result through W register
+ * `wreg` (corresponding to X register `xreg`), saving and restoring
+ * all caller-save registers.
+ *
+ * If `wreg` is different from `w0`, the generated function follows a
+ * non-standard runtime calling convention:
+ * - register `wreg` is used to pass the (sole) argument of this
+ * function (instead of W0);
+ * - register `wreg` is used to return the result of this function
* (instead of W0);
- * - `reg` is used to return the result of this function (instead of W0);
* - W0 is treated like a normal (non-argument) caller-save register;
* - everything else is the same as in the standard runtime calling
- * convention (e.g. same callee-save registers).
+ * convention (e.g. standard callee-save registers are preserved).
*/
-.macro READ_BARRIER_MARK_REG name, reg
+.macro READ_BARRIER_MARK_REG name, wreg, xreg
ENTRY \name
- str xLR, [sp, #-16]! // Save return address and add padding (16B align stack).
- .cfi_adjust_cfa_offset 16
- .cfi_rel_offset x30, 0
- mov w0, \reg // Pass arg1 - obj from `reg`
+ /*
+ * Allocate 46 stack slots * 8 = 368 bytes:
+ * - 20 slots for core registers X0-X19
+ * - 24 slots for floating-point registers D0-D7 and D16-D31
+ * - 1 slot for return address register XLR
+ * - 1 padding slot for 16-byte stack alignment
+ */
+ // Save all potentially live caller-save core registers.
+ stp x0, x1, [sp, #-368]!
+ .cfi_adjust_cfa_offset 368
+ .cfi_rel_offset x0, 0
+ .cfi_rel_offset x1, 8
+ stp x2, x3, [sp, #16]
+ .cfi_rel_offset x2, 16
+ .cfi_rel_offset x3, 24
+ stp x4, x5, [sp, #32]
+ .cfi_rel_offset x4, 32
+ .cfi_rel_offset x5, 40
+ stp x6, x7, [sp, #48]
+ .cfi_rel_offset x6, 48
+ .cfi_rel_offset x7, 56
+ stp x8, x9, [sp, #64]
+ .cfi_rel_offset x8, 64
+ .cfi_rel_offset x9, 72
+ stp x10, x11, [sp, #80]
+ .cfi_rel_offset x10, 80
+ .cfi_rel_offset x11, 88
+ stp x12, x13, [sp, #96]
+ .cfi_rel_offset x12, 96
+ .cfi_rel_offset x13, 104
+ stp x14, x15, [sp, #112]
+ .cfi_rel_offset x14, 112
+ .cfi_rel_offset x15, 120
+ stp x16, x17, [sp, #128]
+ .cfi_rel_offset x16, 128
+ .cfi_rel_offset x17, 136
+ stp x18, x19, [sp, #144]
+ .cfi_rel_offset x18, 144
+ .cfi_rel_offset x19, 152
+ // Save all potentially live caller-save floating-point registers.
+ stp d0, d1, [sp, #160]
+ stp d2, d3, [sp, #176]
+ stp d4, d5, [sp, #192]
+ stp d6, d7, [sp, #208]
+ stp d16, d17, [sp, #224]
+ stp d18, d19, [sp, #240]
+ stp d20, d21, [sp, #256]
+ stp d22, d23, [sp, #272]
+ stp d24, d25, [sp, #288]
+ stp d26, d27, [sp, #304]
+ stp d28, d29, [sp, #320]
+ stp d30, d31, [sp, #336]
+ // Save return address.
+ str xLR, [sp, #352]
+ .cfi_rel_offset x30, 352
+ // (sp + #360 is a padding slot)
+
+ .ifnc \wreg, w0
+ mov w0, \wreg // Pass arg1 - obj from `wreg`
+ .endif
bl artReadBarrierMark // artReadBarrierMark(obj)
- mov \reg, w0 // Return result into `reg`
- ldr xLR, [sp], #16 // Restore return address and remove padding.
+ .ifnc \wreg, w0
+ mov \wreg, w0 // Return result into `wreg`
+ .endif
+
+ // Restore core regs, except `xreg`, as `wreg` is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REGS_NE x0, x1, 0, \xreg
+ POP_REGS_NE x2, x3, 16, \xreg
+ POP_REGS_NE x4, x5, 32, \xreg
+ POP_REGS_NE x6, x7, 48, \xreg
+ POP_REGS_NE x8, x9, 64, \xreg
+ POP_REGS_NE x10, x11, 80, \xreg
+ POP_REGS_NE x12, x13, 96, \xreg
+ POP_REGS_NE x14, x15, 112, \xreg
+ POP_REGS_NE x16, x17, 128, \xreg
+ POP_REGS_NE x18, x19, 144, \xreg
+ // Restore floating-point registers.
+ ldp d0, d1, [sp, #160]
+ ldp d2, d3, [sp, #176]
+ ldp d4, d5, [sp, #192]
+ ldp d6, d7, [sp, #208]
+ ldp d16, d17, [sp, #224]
+ ldp d18, d19, [sp, #240]
+ ldp d20, d21, [sp, #256]
+ ldp d22, d23, [sp, #272]
+ ldp d24, d25, [sp, #288]
+ ldp d26, d27, [sp, #304]
+ ldp d28, d29, [sp, #320]
+ ldp d30, d31, [sp, #336]
+ // Restore return address and remove padding.
+ ldr xLR, [sp, #352]
.cfi_restore x30
- .cfi_adjust_cfa_offset -16
+ add sp, sp, #368
+ .cfi_adjust_cfa_offset -368
ret
END \name
.endm
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28
-READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0, x0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1, x1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2, x2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3, x3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4, x4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5, x5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6, x6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7, x7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8, x8
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9, x9
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 69c939e..1b9ab44 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -31,7 +31,8 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index e75fecb..77e04e7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1908,41 +1908,73 @@
UNREACHABLE
END_FUNCTION art_nested_signal_return
-// Call the ReadBarrierMark entry point, getting input and returning
-// result through EAX (register 0), following the standard runtime
-// calling convention.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg00
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
- PUSH eax // pass arg1 - obj
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- addl LITERAL(12), %esp // pop argument and remove padding
- CFI_ADJUST_CFA_OFFSET(-12)
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg00
-
// Create a function `name` calling the ReadBarrier::Mark routine,
// getting its argument and returning its result through register
-// `reg`, thus following a non-standard runtime calling convention:
-// - `reg` is used to pass the (sole) argument of this function
+// `reg`, saving and restoring all caller-save registers.
+//
+// If `reg` is different from `eax`, the generated function follows a
+// non-standard runtime calling convention:
+// - register `reg` is used to pass the (sole) argument of this function
// (instead of EAX);
-// - `reg` is used to return the result of this function (instead of EAX);
+// - register `reg` is used to return the result of this function
+// (instead of EAX);
// - EAX is treated like a normal (non-argument) caller-save register;
// - everything else is the same as in the standard runtime calling
-// convention (e.g. same callee-save registers).
+// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
+ // Save all potentially live caller-save core registers.
+ PUSH eax
+ PUSH ecx
+ PUSH edx
+ PUSH ebx
+ // 8-byte align the stack to improve (8-byte) XMM register saving and restoring.
+ // and create space for caller-save floating-point registers.
+ subl MACRO_LITERAL(4 + 8 * 8), %esp
+ CFI_ADJUST_CFA_OFFSET(4 + 8 * 8)
+ // Save all potentially live caller-save floating-point registers.
+ movsd %xmm0, 0(%esp)
+ movsd %xmm1, 8(%esp)
+ movsd %xmm2, 16(%esp)
+ movsd %xmm3, 24(%esp)
+ movsd %xmm4, 32(%esp)
+ movsd %xmm5, 40(%esp)
+ movsd %xmm6, 48(%esp)
+ movsd %xmm7, 56(%esp)
+
+ subl LITERAL(4), %esp // alignment padding
+ CFI_ADJUST_CFA_OFFSET(4)
PUSH RAW_VAR(reg) // pass arg1 - obj from `reg`
call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movl %eax, REG_VAR(reg) // return result into `reg`
- addl LITERAL(12), %esp // pop argument and remove padding
- CFI_ADJUST_CFA_OFFSET(-12)
+ .ifnc RAW_VAR(reg), eax
+ movl %eax, REG_VAR(reg) // return result into `reg`
+ .endif
+ addl LITERAL(8), %esp // pop argument and remove padding
+ CFI_ADJUST_CFA_OFFSET(-8)
+
+ // Restore floating-point registers.
+ movsd 0(%esp), %xmm0
+ movsd 8(%esp), %xmm1
+ movsd 16(%esp), %xmm2
+ movsd 24(%esp), %xmm3
+ movsd 32(%esp), %xmm4
+ movsd 40(%esp), %xmm5
+ movsd 48(%esp), %xmm6
+ movsd 56(%esp), %xmm7
+ // Remove floating-point registers and padding.
+ addl MACRO_LITERAL(8 * 8 + 4), %esp
+ CFI_ADJUST_CFA_OFFSET(-(8 * 8 + 4))
+ // Restore core regs, except `reg`, as it is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REG_NE ebx, RAW_VAR(reg)
+ POP_REG_NE edx, RAW_VAR(reg)
+ POP_REG_NE ecx, RAW_VAR(reg)
+ POP_REG_NE eax, RAW_VAR(reg)
ret
END_FUNCTION VAR(name)
END_MACRO
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, eax
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, ecx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, edx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, ebx
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 2bea3db..e6566e1 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -34,7 +34,8 @@
// Read barrier entrypoints.
// art_quick_read_barrier_mark_regX uses an non-standard calling
// convention: it expects its input in register X and returns its
-// result in that same register.
+// result in that same register, and saves and restores all
+// caller-save registers.
extern "C" mirror::Object* art_quick_read_barrier_mark_reg00(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 496e6a8..784ec39 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1815,73 +1815,93 @@
UNREACHABLE
END_FUNCTION art_nested_signal_return
-// Call the ReadBarrier::Mark routine, getting argument and returning
-// result through RAX (register 0), thus following a non-standard
-// runtime calling convention:
-// - RAX is used to pass the (sole) argument of this function (instead
-// of RDI);
-// - RDI is treated like a normal (non-argument) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention; in particular, RAX is still used to return the result
-// of this function.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg00
- SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- movq %rax, %rdi // Pass arg1 - obj from RAX.
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
- RESTORE_FP_CALLEE_SAVE_FRAME
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg00
-
-// Call the ReadBarrier::Mark routine, getting argument and returning
-// result through RDI (register 7), thus following a non-standard
-// runtime calling convention:
-// - RDI is used to return the result of this function (instead of RAX);
-// - RAX is treated like a normal (non-result) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention; in particular, RDI is still used to pass the (sole)
-// argument of this function.
-DEFINE_FUNCTION art_quick_read_barrier_mark_reg07
- SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movq %rax, %rdi // Return result into RDI.
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
- RESTORE_FP_CALLEE_SAVE_FRAME
- ret
-END_FUNCTION art_quick_read_barrier_mark_reg07
-
// Create a function `name` calling the ReadBarrier::Mark routine,
// getting its argument and returning its result through register
-// `reg`, thus following a non-standard runtime calling convention:
-// - `reg` is used to pass the (sole) argument of this function (instead
-// of RDI);
-// - `reg` is used to return the result of this function (instead of RAX);
-// - RDI is treated like a normal (non-argument) caller-save register;
-// - RAX is treated like a normal (non-result) caller-save register;
+// `reg`, saving and restoring all caller-save registers.
+//
+// The generated function follows a non-standard runtime calling
+// convention:
+// - register `reg` (which may be different from RDI) is used to pass
+// the (sole) argument of this function;
+// - register `reg` (which may be different from RAX) is used to return
+// the result of this function (instead of RAX);
+// - if `reg` is different from `rdi`, RDI is treated like a normal
+// (non-argument) caller-save register;
+// - if `reg` is different from `rax`, RAX is treated like a normal
+// (non-result) caller-save register;
// - everything else is the same as in the standard runtime calling
-// convention (e.g. same callee-save registers).
+// convention (e.g. standard callee-save registers are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
+ // Save all potentially live caller-save core registers.
+ PUSH rax
+ PUSH rcx
+ PUSH rdx
+ PUSH rsi
+ PUSH rdi
+ PUSH r8
+ PUSH r9
+ PUSH r10
+ PUSH r11
+ // Create space for caller-save floating-point registers.
+ subq MACRO_LITERAL(12 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(12 * 8)
+ // Save all potentially live caller-save floating-point registers.
+ movq %xmm0, 0(%rsp)
+ movq %xmm1, 8(%rsp)
+ movq %xmm2, 16(%rsp)
+ movq %xmm3, 24(%rsp)
+ movq %xmm4, 32(%rsp)
+ movq %xmm5, 40(%rsp)
+ movq %xmm6, 48(%rsp)
+ movq %xmm7, 56(%rsp)
+ movq %xmm8, 64(%rsp)
+ movq %xmm9, 72(%rsp)
+ movq %xmm10, 80(%rsp)
+ movq %xmm11, 88(%rsp)
SETUP_FP_CALLEE_SAVE_FRAME
- subq LITERAL(8), %rsp // Alignment padding.
- CFI_ADJUST_CFA_OFFSET(8)
- movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`.
+
+ .ifnc RAW_VAR(reg), rdi
+ movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`.
+ .endif
call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
- movq %rax, REG_VAR(reg) // Return result into `reg`.
- addq LITERAL(8), %rsp // Remove padding.
- CFI_ADJUST_CFA_OFFSET(-8)
+ .ifnc RAW_VAR(reg), rax
+ movq %rax, REG_VAR(reg) // Return result into `reg`.
+ .endif
+
RESTORE_FP_CALLEE_SAVE_FRAME
+ // Restore floating-point registers.
+ movq 0(%rsp), %xmm0
+ movq 8(%rsp), %xmm1
+ movq 16(%rsp), %xmm2
+ movq 24(%rsp), %xmm3
+ movq 32(%rsp), %xmm4
+ movq 40(%rsp), %xmm5
+ movq 48(%rsp), %xmm6
+ movq 56(%rsp), %xmm7
+ movq 64(%rsp), %xmm8
+ movq 72(%rsp), %xmm9
+ movq 80(%rsp), %xmm10
+ movq 88(%rsp), %xmm11
+ // Remove floating-point registers.
+ addq MACRO_LITERAL(12 * 8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(12 * 8))
+ // Restore core regs, except `reg`, as it is used to return the
+ // result of this function (simply remove it from the stack instead).
+ POP_REG_NE r11, RAW_VAR(reg)
+ POP_REG_NE r10, RAW_VAR(reg)
+ POP_REG_NE r9, RAW_VAR(reg)
+ POP_REG_NE r8, RAW_VAR(reg)
+ POP_REG_NE rdi, RAW_VAR(reg)
+ POP_REG_NE rsi, RAW_VAR(reg)
+ POP_REG_NE rdx, RAW_VAR(reg)
+ POP_REG_NE rcx, RAW_VAR(reg)
+ POP_REG_NE rax, RAW_VAR(reg)
ret
END_FUNCTION VAR(name)
END_MACRO
-// Note: art_quick_read_barrier_mark_reg00 is implemented above.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
@@ -1889,7 +1909,7 @@
// cannot be used to pass arguments.
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
-// Note: art_quick_read_barrier_mark_reg07 is implemented above.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
diff --git a/runtime/oat.h b/runtime/oat.h
index e506e3c..9b8f545 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '8', '3', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";