JNI: Move args in registers for @FastNative.
Golem results for art-opt-cc (higher is better):
linux-ia32 before after
NativeDowncallStaticFast 222.00 222.17 (+0.0751%)
NativeDowncallStaticFast6 139.86 161.00 (+15.11%)
NativeDowncallStaticFastRefs6 131.00 137.86 (+5.238%)
NativeDowncallVirtualFast 211.79 217.17 (+2.543%)
NativeDowncallVirtualFast6 137.36 150.55 (+9.599%)
NativeDowncallVirtualFastRefs6 131.50 132.60 (+0.8382%)
linux-x64 before after
NativeDowncallStaticFast 173.15 173.24 (+0.0499%)
NativeDowncallStaticFast6 135.50 157.61 (+16.31%)
NativeDowncallStaticFastRefs6 127.06 134.87 (+6.147%)
NativeDowncallVirtualFast 163.67 165.83 (+1.321%)
NativeDowncallVirtualFast6 128.18 147.35 (+14.96%)
NativeDowncallVirtualFastRefs6 123.44 130.74 (+5.914%)
linux-armv7 before after
NativeDowncallStaticFast 21.622 21.622 (0%)
NativeDowncallStaticFast6 17.250 18.719 (+8.518%)
NativeDowncallStaticFastRefs6 14.757 15.663 (+6.145%)
NativeDowncallVirtualFast 21.027 21.319 (+1.388%)
NativeDowncallVirtualFast6 17.439 18.953 (+8.680%)
NativeDowncallVirtualFastRefs6 14.764 15.992 (+8.319%)
linux-armv8 before after
NativeDowncallStaticFast 23.244 23.610 (+1.575%)
NativeDowncallStaticFast6 18.719 21.622 (+15.50%)
NativeDowncallStaticFastRefs6 14.757 18.491 (+20.89%)
NativeDowncallVirtualFast 20.197 21.319 (+5.554%)
NativeDowncallVirtualFast6 18.272 21.027 (+15.08%)
NativeDowncallVirtualFastRefs6 13.951 16.865 (+20.89%)
(The arm64 NativeDowncallVirtualFast reference value is very
low, resulting in an unexpected +5.554% improvement. As the
previous results seem to jump between 20.197 and 20.741,
the actual improvement is probably just around 2.5%.)
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I2b596414458b48a758826eafc223529e9f2fe059
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 82c2555..e08037c 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -264,10 +264,9 @@
// 2. Call into appropriate JniMethodStart to transition out of Runnable for normal native.
// 2.1. Spill all register arguments to preserve them across the JniMethodStart call.
- if (LIKELY(!is_critical_native)) {
+ if (LIKELY(!is_critical_native && !is_fast_native)) {
// TODO: Pass these in a single call to let the assembler use multi-register stores.
// TODO: Spill native stack args straight to their stack locations (adjust SP earlier).
- // TODO: For @FastNative, move args in registers, spill only references.
mr_conv->ResetIterator(FrameOffset(current_frame_size));
for (; mr_conv->HasNext(); mr_conv->Next()) {
if (mr_conv->IsCurrentParamInRegister()) {
@@ -362,48 +361,70 @@
// 4. Make the main native call.
// 4.1. Fill arguments.
- if (UNLIKELY(is_critical_native)) {
+ if (UNLIKELY(is_critical_native || is_fast_native)) {
ArenaVector<ArgumentLocation> src_args(allocator.Adapter());
ArenaVector<ArgumentLocation> dest_args(allocator.Adapter());
- // Move the method pointer to the hidden argument register.
- dest_args.push_back(ArgumentLocation(main_jni_conv->HiddenArgumentRegister(), kRawPointerSize));
- src_args.push_back(ArgumentLocation(mr_conv->MethodRegister(), kRawPointerSize));
- // Move normal arguments to their locations.
+ ArenaVector<FrameOffset> refs(allocator.Adapter());
mr_conv->ResetIterator(FrameOffset(current_frame_size));
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
- for (; mr_conv->HasNext(); mr_conv->Next(), main_jni_conv->Next()) {
- DCHECK(main_jni_conv->HasNext());
- size_t size = mr_conv->IsCurrentParamALongOrDouble() ? 8u : 4u;
- src_args.push_back(mr_conv->IsCurrentParamInRegister()
- ? ArgumentLocation(mr_conv->CurrentParamRegister(), size)
- : ArgumentLocation(mr_conv->CurrentParamStackOffset(), size));
- dest_args.push_back(main_jni_conv->IsCurrentParamInRegister()
- ? ArgumentLocation(main_jni_conv->CurrentParamRegister(), size)
- : ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), size));
- }
- DCHECK(!main_jni_conv->HasNext());
- __ MoveArguments(ArrayRef<ArgumentLocation>(dest_args), ArrayRef<ArgumentLocation>(src_args));
- } else {
- if (UNLIKELY(!method_register.IsNoRegister())) {
+ constexpr FrameOffset kInvalidReferenceOffset =
+ JNIMacroAssembler<kPointerSize>::kInvalidReferenceOffset;
+ if (is_critical_native) {
+ // Move the method pointer to the hidden argument register.
+ // TODO: Pass this as the last argument, not first. Change ARM assembler
+ // not to expect all register destinations at the beginning.
+ src_args.emplace_back(mr_conv->MethodRegister(), kRawPointerSize);
+ dest_args.emplace_back(main_jni_conv->HiddenArgumentRegister(), kRawPointerSize);
+ refs.push_back(kInvalidReferenceOffset);
+ } else {
DCHECK(is_fast_native);
- // In general, we do not know if the method register shall be clobbered by initializing
- // some argument below. However, for most supported architectures (arm, arm64, x86_64),
- // the `method_register` is the same as the `JNIEnv*` argument register which is
- // initialized last, so we can quickly check that case and use the original method
- // register to initialize the `jclass` for static methods. Otherwise, move the method
- // to the `callee_save_temp` as we shall need it for the call.
- main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
- if (main_jni_conv->IsCurrentParamInRegister() &&
- main_jni_conv->CurrentParamRegister().Equals(method_register) &&
- is_static) {
- // Keep the current `method_register`.
- } else {
+ DCHECK(!method_register.IsNoRegister());
+ main_jni_conv->Next(); // Skip JNIEnv*.
+ if (!is_static || !main_jni_conv->IsCurrentParamInRegister()) {
+ // The old method register can be clobbered by argument moves.
+ // Preserve the method in `callee_save_temp`.
ManagedRegister new_method_reg = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize);
__ Move(new_method_reg, method_register, kRawPointerSize);
method_register = new_method_reg;
}
+ if (is_static) {
+ // For static methods, move the method to the `jclass` argument.
+ DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
+ src_args.emplace_back(method_register, kRawPointerSize);
+ if (main_jni_conv->IsCurrentParamInRegister()) {
+ // The `jclass` argument becomes the new method register needed for the call.
+ method_register = main_jni_conv->CurrentParamRegister();
+ dest_args.emplace_back(method_register, kRawPointerSize);
+ } else {
+ dest_args.emplace_back(main_jni_conv->CurrentParamStackOffset(), kRawPointerSize);
+ }
+ refs.push_back(kInvalidReferenceOffset);
+ main_jni_conv->Next();
+ }
}
-
+ // Move normal arguments to their locations.
+ // Note that the `this` argument for @FastNative instance methods is passed first,
+ // so that `MoveArguments()` treats it as non-null.
+ for (; mr_conv->HasNext(); mr_conv->Next(), main_jni_conv->Next()) {
+ DCHECK(main_jni_conv->HasNext());
+ static_assert(kObjectReferenceSize == 4u);
+ bool is_reference = mr_conv->IsCurrentParamAReference();
+ size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u;
+ size_t dest_size = is_reference ? kRawPointerSize : src_size;
+ src_args.push_back(mr_conv->IsCurrentParamInRegister()
+ ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size)
+ : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size));
+ dest_args.push_back(main_jni_conv->IsCurrentParamInRegister()
+ ? ArgumentLocation(main_jni_conv->CurrentParamRegister(), dest_size)
+ : ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), dest_size));
+ refs.emplace_back(
+ is_reference ? mr_conv->CurrentParamStackOffset() : kInvalidReferenceOffset);
+ }
+ DCHECK(!main_jni_conv->HasNext());
+ __ MoveArguments(ArrayRef<ArgumentLocation>(dest_args),
+ ArrayRef<ArgumentLocation>(src_args),
+ ArrayRef<FrameOffset>(refs));
+ } else {
// Iterate over arguments placing values from managed calling convention in
// to the convention required for a native call (shuffling). For references
// place an index/pointer to the reference after checking whether it is
@@ -445,19 +466,17 @@
DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
FrameOffset method_offset =
FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue());
+ DCHECK(method_register.IsNoRegister());
if (is_static) {
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
main_jni_conv->Next(); // Skip JNIEnv*
- // Load reference to the method's declaring class. For normal native, the method register
- // has been clobbered by the above call, so we need to load the method from the stack.
- if (method_register.IsNoRegister()) {
- // Use the `callee_save_temp` if the parameter goes on the stack.
- method_register = main_jni_conv->IsCurrentParamOnStack()
- ? __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize)
- : main_jni_conv->CurrentParamRegister();
- __ Load(method_register, method_offset, kRawPointerSize);
- }
- DCHECK(!method_register.IsNoRegister());
+ // Load reference to the method's declaring class. The method register has been
+ // clobbered by the above call, so we need to load the method from the stack.
+ // Use the `callee_save_temp` if the parameter goes on the stack.
+ method_register = main_jni_conv->IsCurrentParamOnStack()
+ ? __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize)
+ : main_jni_conv->CurrentParamRegister();
+ __ Load(method_register, method_offset, kRawPointerSize);
if (main_jni_conv->IsCurrentParamOnStack()) {
// Store the method argument.
FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
@@ -467,16 +486,16 @@
__ Move(out_reg, method_register, kRawPointerSize); // No-op if equal.
method_register = out_reg;
}
- } else if (LIKELY(method_register.IsNoRegister())) {
+ } else {
// Load the method for non-static methods to `callee_save_temp` as we need it for the call.
method_register = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize);
__ Load(method_register, method_offset, kRawPointerSize);
}
+ }
- // Set the iterator back to the incoming Method*.
+ // 4.3. Create 1st argument, the JNI environment ptr.
+ if (LIKELY(!is_critical_native)) {
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
-
- // 4.3. Create 1st argument, the JNI environment ptr.
if (main_jni_conv->IsCurrentParamInRegister()) {
ManagedRegister jni_env_arg = main_jni_conv->CurrentParamRegister();
__ Move(jni_env_arg, jni_env_reg, kRawPointerSize);
@@ -825,7 +844,6 @@
ManagedRuntimeCallingConvention* mr_conv,
JniCallingConvention* jni_conv) {
// We spilled all registers, so use stack locations.
- // TODO: Move args in registers for @CriticalNative.
bool input_in_reg = false; // mr_conv->IsCurrentParamInRegister();
bool output_in_reg = jni_conv->IsCurrentParamInRegister();
FrameOffset spilled_reference_offset(0);
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 2857ff4..ac263c1 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -45,6 +45,9 @@
// STRD immediate can encode any 4-byte aligned offset smaller than this cutoff.
static constexpr size_t kStrdOffsetCutoff = 1024u;
+// ADD sp, imm can encode 4-byte aligned immediate smaller than this cutoff.
+static constexpr size_t kAddSpImmCutoff = 1024u;
+
vixl::aarch32::Register AsVIXLRegister(ArmManagedRegister reg) {
CHECK(reg.IsCoreRegister());
return vixl::aarch32::Register(reg.RegId());
@@ -464,28 +467,11 @@
// Get the number of locations to spill together.
static inline size_t GetSpillChunkSize(ArrayRef<ArgumentLocation> dests,
ArrayRef<ArgumentLocation> srcs,
- size_t start,
- bool have_extra_temp) {
+ size_t start) {
DCHECK_LT(start, dests.size());
DCHECK_ALIGNED(dests[start].GetFrameOffset().Uint32Value(), 4u);
const ArgumentLocation& first_src = srcs[start];
- if (!first_src.IsRegister()) {
- DCHECK_ALIGNED(first_src.GetFrameOffset().Uint32Value(), 4u);
- // If we have an extra temporary, look for opportunities to move 2 words
- // at a time with LDRD/STRD when the source types are word-sized.
- if (have_extra_temp &&
- start + 1u != dests.size() &&
- !srcs[start + 1u].IsRegister() &&
- first_src.GetSize() == 4u &&
- srcs[start + 1u].GetSize() == 4u &&
- NoSpillGap(first_src, srcs[start + 1u]) &&
- NoSpillGap(dests[start], dests[start + 1u]) &&
- dests[start].GetFrameOffset().Uint32Value() < kStrdOffsetCutoff) {
- // Note: The source and destination may not be 8B aligned (but they are 4B aligned).
- return 2u;
- }
- return 1u;
- }
+ DCHECK(first_src.IsRegister());
ArmManagedRegister first_src_reg = first_src.GetRegister().AsArm();
size_t end = start + 1u;
if (IsCoreRegisterOrPair(first_src_reg)) {
@@ -555,8 +541,46 @@
}
void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
- ArrayRef<ArgumentLocation> srcs) {
- DCHECK_EQ(dests.size(), srcs.size());
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) {
+ size_t arg_count = dests.size();
+ DCHECK_EQ(arg_count, srcs.size());
+ DCHECK_EQ(arg_count, refs.size());
+
+ // Spill reference registers. Spill two references together with STRD where possible.
+ for (size_t i = 0; i != arg_count; ++i) {
+ if (refs[i] != kInvalidReferenceOffset) {
+ DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize);
+ if (srcs[i].IsRegister()) {
+ DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize);
+ // Use STRD if we're storing 2 consecutive references within the available STRD range.
+ if (i + 1u != arg_count &&
+ refs[i + 1u] != kInvalidReferenceOffset &&
+ srcs[i + 1u].IsRegister() &&
+ refs[i].SizeValue() < kStrdOffsetCutoff) {
+ DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize);
+ DCHECK_EQ(refs[i + 1u].SizeValue(), refs[i].SizeValue() + kObjectReferenceSize);
+ ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()),
+ AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()),
+ MemOperand(sp, refs[i].SizeValue()));
+ ++i;
+ } else {
+ Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize);
+ }
+ } else {
+ DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]);
+ }
+ }
+ }
+
+ // Convert reference registers to `jobject` values.
+ for (size_t i = 0; i != arg_count; ++i) {
+ if (refs[i] != kInvalidReferenceOffset && srcs[i].IsRegister()) {
+ // Note: We can clobber `srcs[i]` here as the register cannot hold more than one argument.
+ ManagedRegister src_i_reg = srcs[i].GetRegister();
+ CreateJObject(src_i_reg, refs[i], src_i_reg, /*null_allowed=*/ i != 0u);
+ }
+ }
// Native ABI is soft-float, so all destinations should be core registers or stack offsets.
// And register locations should be first, followed by stack locations with increasing offset.
@@ -574,12 +598,14 @@
// Collect registers to move. No need to record FP regs as destinations are only core regs.
uint32_t src_regs = 0u;
uint32_t dest_regs = 0u;
+ uint32_t same_regs = 0u;
for (size_t i = 0; i != num_reg_dests; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
DCHECK(dest.IsRegister() && IsCoreRegisterOrPair(dest.GetRegister().AsArm()));
if (src.IsRegister() && IsCoreRegisterOrPair(src.GetRegister().AsArm())) {
if (src.GetRegister().Equals(dest.GetRegister())) {
+ same_regs |= GetCoreRegisterMask(src.GetRegister().AsArm());
continue;
}
src_regs |= GetCoreRegisterMask(src.GetRegister().AsArm());
@@ -587,85 +613,141 @@
dest_regs |= GetCoreRegisterMask(dest.GetRegister().AsArm());
}
- // Spill args first. Look for opportunities to spill multiple arguments at once.
- {
- UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
- vixl32::Register xtemp; // Extra temp register;
- if ((dest_regs & ~src_regs) != 0u) {
- xtemp = vixl32::Register(CTZ(dest_regs & ~src_regs));
- DCHECK(!temps.IsAvailable(xtemp));
+ // Spill register arguments to stack slots.
+ for (size_t i = num_reg_dests; i != arg_count; ) {
+ const ArgumentLocation& src = srcs[i];
+ if (!src.IsRegister()) {
+ ++i;
+ continue;
}
- auto move_two_words = [&](FrameOffset dest_offset, FrameOffset src_offset) {
- DCHECK(xtemp.IsValid());
- DCHECK_LT(dest_offset.Uint32Value(), kStrdOffsetCutoff);
- // VIXL macro assembler can use destination registers for loads from large offsets.
+ const ArgumentLocation& dest = dests[i];
+ DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references.
+ DCHECK(!dest.IsRegister());
+ uint32_t frame_offset = dest.GetFrameOffset().Uint32Value();
+ size_t chunk_size = GetSpillChunkSize(dests, srcs, i);
+ DCHECK_NE(chunk_size, 0u);
+ if (chunk_size == 1u) {
+ Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
+ } else if (UseStrdForChunk(srcs, i, chunk_size)) {
+ ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()),
+ AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()),
+ MemOperand(sp, frame_offset));
+ } else if (UseVstrForChunk(srcs, i, chunk_size)) {
+ size_t sreg = GetSRegisterNumber(src.GetRegister().AsArm());
+ DCHECK_ALIGNED(sreg, 2u);
+ ___ Vstr(vixl32::DRegister(sreg / 2u), MemOperand(sp, frame_offset));
+ } else {
UseScratchRegisterScope temps2(asm_.GetVIXLAssembler());
- vixl32::Register temp2 = temps2.Acquire();
- ___ Ldrd(xtemp, temp2, MemOperand(sp, src_offset.Uint32Value()));
- ___ Strd(xtemp, temp2, MemOperand(sp, dest_offset.Uint32Value()));
- };
- for (size_t i = num_reg_dests, arg_count = dests.size(); i != arg_count; ) {
- const ArgumentLocation& src = srcs[i];
- const ArgumentLocation& dest = dests[i];
- DCHECK_EQ(src.GetSize(), dest.GetSize());
- DCHECK(!dest.IsRegister());
- uint32_t frame_offset = dest.GetFrameOffset().Uint32Value();
- size_t chunk_size = GetSpillChunkSize(dests, srcs, i, xtemp.IsValid());
- DCHECK_NE(chunk_size, 0u);
- if (chunk_size == 1u) {
- if (src.IsRegister()) {
- Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
- } else if (dest.GetSize() == 8u && xtemp.IsValid() && frame_offset < kStrdOffsetCutoff) {
- move_two_words(dest.GetFrameOffset(), src.GetFrameOffset());
- } else {
- Copy(dest.GetFrameOffset(), src.GetFrameOffset(), dest.GetSize());
- }
- } else if (!src.IsRegister()) {
- DCHECK_EQ(chunk_size, 2u);
- DCHECK_EQ(dest.GetSize(), 4u);
- DCHECK_EQ(dests[i + 1u].GetSize(), 4u);
- move_two_words(dest.GetFrameOffset(), src.GetFrameOffset());
- } else if (UseStrdForChunk(srcs, i, chunk_size)) {
- ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()),
- AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()),
- MemOperand(sp, frame_offset));
- } else if (UseVstrForChunk(srcs, i, chunk_size)) {
- size_t sreg = GetSRegisterNumber(src.GetRegister().AsArm());
- DCHECK_ALIGNED(sreg, 2u);
- ___ Vstr(vixl32::DRegister(sreg / 2u), MemOperand(sp, frame_offset));
+ vixl32::Register base_reg;
+ if (frame_offset == 0u) {
+ base_reg = sp;
} else {
- UseScratchRegisterScope temps2(asm_.GetVIXLAssembler());
- vixl32::Register base_reg;
- if (frame_offset == 0u) {
- base_reg = sp;
- } else {
- base_reg = temps2.Acquire();
- ___ Add(base_reg, sp, frame_offset);
- }
+ base_reg = temps2.Acquire();
+ ___ Add(base_reg, sp, frame_offset);
+ }
- ArmManagedRegister src_reg = src.GetRegister().AsArm();
- if (IsCoreRegisterOrPair(src_reg)) {
- uint32_t core_reg_mask = GetCoreRegisterMask(srcs.SubArray(i, chunk_size));
- ___ Stm(base_reg, NO_WRITE_BACK, RegisterList(core_reg_mask));
+ ArmManagedRegister src_reg = src.GetRegister().AsArm();
+ if (IsCoreRegisterOrPair(src_reg)) {
+ uint32_t core_reg_mask = GetCoreRegisterMask(srcs.SubArray(i, chunk_size));
+ ___ Stm(base_reg, NO_WRITE_BACK, RegisterList(core_reg_mask));
+ } else {
+ uint32_t start_sreg = GetSRegisterNumber(src_reg);
+ const ArgumentLocation& last_dest = dests[i + chunk_size - 1u];
+ uint32_t total_size =
+ last_dest.GetFrameOffset().Uint32Value() + last_dest.GetSize() - frame_offset;
+ if (IsAligned<2u>(start_sreg) &&
+ IsAligned<kDRegSizeInBytes>(frame_offset) &&
+ IsAligned<kDRegSizeInBytes>(total_size)) {
+ uint32_t dreg_count = total_size / kDRegSizeInBytes;
+ DRegisterList dreg_list(vixl32::DRegister(start_sreg / 2u), dreg_count);
+ ___ Vstm(F64, base_reg, NO_WRITE_BACK, dreg_list);
} else {
- uint32_t start_sreg = GetSRegisterNumber(src_reg);
- const ArgumentLocation& last_dest = dests[i + chunk_size - 1u];
- uint32_t total_size =
- last_dest.GetFrameOffset().Uint32Value() + last_dest.GetSize() - frame_offset;
- if (IsAligned<2u>(start_sreg) &&
- IsAligned<kDRegSizeInBytes>(frame_offset) &&
- IsAligned<kDRegSizeInBytes>(total_size)) {
- uint32_t dreg_count = total_size / kDRegSizeInBytes;
- DRegisterList dreg_list(vixl32::DRegister(start_sreg / 2u), dreg_count);
- ___ Vstm(F64, base_reg, NO_WRITE_BACK, dreg_list);
- } else {
- uint32_t sreg_count = total_size / kSRegSizeInBytes;
- SRegisterList sreg_list(vixl32::SRegister(start_sreg), sreg_count);
- ___ Vstm(F32, base_reg, NO_WRITE_BACK, sreg_list);
- }
+ uint32_t sreg_count = total_size / kSRegSizeInBytes;
+ SRegisterList sreg_list(vixl32::SRegister(start_sreg), sreg_count);
+ ___ Vstm(F32, base_reg, NO_WRITE_BACK, sreg_list);
}
}
- i += chunk_size;
+ }
+ i += chunk_size;
+ }
+
+ // Copy incoming stack arguments to outgoing stack arguments.
+ // Registers r0-r3 are argument registers for both managed and native ABI and r4
+ // is a scratch register in managed ABI but also a hidden argument register for
+ // @CriticalNative call. We can use these registers as temporaries for copying
+ // stack arguments as long as they do not currently hold live values.
+ // TODO: Use the callee-save scratch registers instead to avoid using calling
+ // convention knowledge in the assembler. This would require reordering the
+ // argument move with pushing the IRT frame where those registers are used.
+ uint32_t copy_temp_regs = ((1u << 5) - 1u) & ~(same_regs | src_regs);
+ if ((dest_regs & (1u << R4)) != 0) {
+ // For @CriticalNative, R4 shall hold the hidden argument but it is available
+ // for use as a temporary at this point. However, it may be the only available
+ // register, so we shall use IP as the second temporary if needed.
+ // We do not need to worry about `CreateJObject` for @CriticalNative.
+ DCHECK_NE(copy_temp_regs, 0u);
+ DCHECK(std::all_of(refs.begin(),
+ refs.end(),
+ [](FrameOffset r) { return r == kInvalidReferenceOffset; }));
+ } else {
+ // For normal native and @FastNative, R4 and at least one of R0-R3 should be
+ // available because there are only 3 destination registers R1-R3 where the
+ // source registers can be moved. The R0 shall be filled by the `JNIEnv*`
+ // argument later. We need to keep IP available for `CreateJObject()`.
+ DCHECK_GE(POPCOUNT(copy_temp_regs), 2);
+ }
+ vixl32::Register copy_temp1 = vixl32::Register(LeastSignificantBit(copy_temp_regs));
+ copy_temp_regs ^= 1u << copy_temp1.GetCode();
+ vixl32::Register copy_xtemp = (copy_temp_regs != 0u)
+ ? vixl32::Register(LeastSignificantBit(copy_temp_regs))
+ : vixl32::Register();
+ for (size_t i = num_reg_dests; i != arg_count; ++i) {
+ if (srcs[i].IsRegister()) {
+ continue;
+ }
+ FrameOffset src_offset = srcs[i].GetFrameOffset();
+ DCHECK_ALIGNED(src_offset.Uint32Value(), 4u);
+ FrameOffset dest_offset = dests[i].GetFrameOffset();
+ DCHECK_ALIGNED(dest_offset.Uint32Value(), 4u);
+ // Look for opportunities to move 2 words at a time with LDRD/STRD
+ // when the source types are word-sized.
+ if (srcs[i].GetSize() == 4u &&
+ i + 1u != arg_count &&
+ !srcs[i + 1u].IsRegister() &&
+ srcs[i + 1u].GetSize() == 4u &&
+ NoSpillGap(srcs[i], srcs[i + 1u]) &&
+ NoSpillGap(dests[i], dests[i + 1u]) &&
+ dest_offset.Uint32Value() < kStrdOffsetCutoff) {
+ UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+ vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire();
+ ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value()));
+ if (refs[i] != kInvalidReferenceOffset) {
+ ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister(
+ enum_cast<Register>(copy_temp1.GetCode()));
+ CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u);
+ }
+ if (refs[i + 1u] != kInvalidReferenceOffset) {
+ ArmManagedRegister m_copy_temp2 = ArmManagedRegister::FromCoreRegister(
+ enum_cast<Register>(copy_temp2.GetCode()));
+ CreateJObject(m_copy_temp2, refs[i + 1u], m_copy_temp2, /*null_allowed=*/ true);
+ }
+ ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value()));
+ ++i;
+ } else if (dests[i].GetSize() == 8u && dest_offset.Uint32Value() < kStrdOffsetCutoff) {
+ UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+ vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire();
+ ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value()));
+ ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value()));
+ } else if (refs[i] != kInvalidReferenceOffset) {
+ // Do not use the `CreateJObject()` overload for stack target as it generates
+ // worse code than explicitly using a low register temporary.
+ ___ Ldr(copy_temp1, MemOperand(sp, src_offset.Uint32Value()));
+ ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister(
+ enum_cast<Register>(copy_temp1.GetCode()));
+ CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u);
+ ___ Str(copy_temp1, MemOperand(sp, dest_offset.Uint32Value()));
+ } else {
+ Copy(dest_offset, src_offset, dests[i].GetSize());
}
}
@@ -719,6 +801,16 @@
___ Ldrd(AsVIXLRegister(dests[i].GetRegister().AsArm()),
AsVIXLRegister(dests[j].GetRegister().AsArm()),
MemOperand(sp, srcs[i].GetFrameOffset().Uint32Value()));
+ if (refs[i] != kInvalidReferenceOffset) {
+ DCHECK_EQ(refs[i], srcs[i].GetFrameOffset());
+ ManagedRegister dest_i_reg = dests[i].GetRegister();
+ CreateJObject(dest_i_reg, refs[i], dest_i_reg, /*null_allowed=*/ i != 0u);
+ }
+ if (refs[j] != kInvalidReferenceOffset) {
+ DCHECK_EQ(refs[j], srcs[j].GetFrameOffset());
+ ManagedRegister dest_j_reg = dests[j].GetRegister();
+ CreateJObject(dest_j_reg, refs[j], dest_j_reg, /*null_allowed=*/ true);
+ }
++j;
continue;
}
@@ -737,6 +829,9 @@
}
if (srcs[i].IsRegister()) {
Move(dests[i].GetRegister(), srcs[i].GetRegister(), dests[i].GetSize());
+ } else if (refs[i] != kInvalidReferenceOffset) {
+ ManagedRegister dest_i_reg = dests[i].GetRegister();
+ CreateJObject(dest_i_reg, refs[i], dest_i_reg, /*null_allowed=*/ i != 0u);
} else {
Load(dests[i].GetRegister(), srcs[i].GetFrameOffset(), dests[i].GetSize());
}
@@ -881,27 +976,27 @@
in_reg = out_reg;
}
- temps.Exclude(in_reg);
- ___ Cmp(in_reg, 0);
-
- if (asm_.ShifterOperandCanHold(ADD, spilled_reference_offset.Int32Value())) {
- if (!out_reg.Is(in_reg)) {
- ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
- 3 * vixl32::kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- ___ it(eq, 0xc);
- ___ mov(eq, out_reg, 0);
- asm_.AddConstantInIt(out_reg, sp, spilled_reference_offset.Int32Value(), ne);
+ if (out_reg.IsLow() && spilled_reference_offset.Uint32Value() < kAddSpImmCutoff) {
+ // There is a 16-bit "ADD Rd, SP, <imm>" instruction we can use in IT-block.
+ if (out_reg.Is(in_reg)) {
+ ___ Cmp(in_reg, 0);
} else {
- ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
- 2 * vixl32::kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- ___ it(ne, 0x8);
- asm_.AddConstantInIt(out_reg, sp, spilled_reference_offset.Int32Value(), ne);
+ ___ Movs(out_reg, in_reg);
}
+ ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes);
+ ___ it(ne);
+ ___ add(ne, Narrow, out_reg, sp, spilled_reference_offset.Int32Value());
} else {
- // TODO: Implement this (old arm assembler would have crashed here).
- UNIMPLEMENTED(FATAL);
+ vixl32::Register addr_reg = out_reg.Is(in_reg) ? temps.Acquire() : out_reg;
+ vixl32::Register cond_mov_src_reg = out_reg.Is(in_reg) ? addr_reg : in_reg;
+ vixl32::Condition cond = out_reg.Is(in_reg) ? ne : eq;
+ ___ Add(addr_reg, sp, spilled_reference_offset.Int32Value());
+ ___ Cmp(in_reg, 0);
+ ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes);
+ ___ it(cond);
+ ___ mov(cond, Narrow, out_reg, cond_mov_src_reg);
}
} else {
asm_.AddConstant(out_reg, sp, spilled_reference_offset.Int32Value());
@@ -920,6 +1015,7 @@
// e.g. scratch = (scratch == 0) ? 0 : (SP+spilled_reference_offset)
___ Cmp(scratch, 0);
+ // FIXME: Using 32-bit T32 instruction in IT-block is deprecated.
if (asm_.ShifterOperandCanHold(ADD, spilled_reference_offset.Int32Value())) {
ExactAssemblyScope guard(asm_.GetVIXLAssembler(),
2 * vixl32::kMaxInstructionSizeInBytes,
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index f4dc1d1..49f5e7c 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -94,7 +94,9 @@
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override;
// Copying routines.
- void MoveArguments(ArrayRef<ArgumentLocation> dests, ArrayRef<ArgumentLocation> srcs) override;
+ void MoveArguments(ArrayRef<ArgumentLocation> dests,
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) override;
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 57e0823..073c2f0 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -42,6 +42,9 @@
static constexpr size_t kAapcs64StackAlignment = 16u;
static_assert(kAapcs64StackAlignment == kStackAlignment);
+// STP signed offset for W-register can encode any 4-byte aligned offset smaller than this cutoff.
+static constexpr size_t kStpWOffsetCutoff = 256u;
+
Arm64JNIMacroAssembler::~Arm64JNIMacroAssembler() {
}
@@ -364,8 +367,36 @@
// Copying routines.
void Arm64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
- ArrayRef<ArgumentLocation> srcs) {
- DCHECK_EQ(dests.size(), srcs.size());
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) {
+ size_t arg_count = dests.size();
+ DCHECK_EQ(arg_count, srcs.size());
+ DCHECK_EQ(arg_count, refs.size());
+
+ // Spill reference registers. Spill two references together with STP where possible.
+ for (size_t i = 0; i != arg_count; ++i) {
+ if (refs[i] != kInvalidReferenceOffset) {
+ DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize);
+ if (srcs[i].IsRegister()) {
+ // Use STP if we're storing 2 consecutive references within the available STP range.
+ if (i + 1u != arg_count &&
+ refs[i + 1u].SizeValue() == refs[i].SizeValue() + kObjectReferenceSize &&
+ srcs[i + 1u].IsRegister() &&
+ refs[i].SizeValue() < kStpWOffsetCutoff) {
+ DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize);
+ ___ Stp(reg_w(srcs[i].GetRegister().AsArm64().AsWRegister()),
+ reg_w(srcs[i + 1u].GetRegister().AsArm64().AsWRegister()),
+ MEM_OP(sp, refs[i].SizeValue()));
+ ++i;
+ } else {
+ Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize);
+ }
+ } else {
+ DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]);
+ }
+ }
+ }
+
auto get_mask = [](ManagedRegister reg) -> uint64_t {
Arm64ManagedRegister arm64_reg = reg.AsArm64();
if (arm64_reg.IsXRegister()) {
@@ -387,19 +418,34 @@
return (UINT64_C(1) << 32u) << fp_reg_number;
}
};
+
// Collect registers to move while storing/copying args to stack slots.
+ // Convert processed references to `jobject`.
// More than 8 core or FP reg args are very rare, so we do not optimize
// for that case by using LDP/STP.
- // TODO: LDP/STP will be useful for normal and @FastNative where we need
+ // TODO: LDP/STP will be useful for normal native methods where we need
// to spill even the leading arguments.
uint64_t src_regs = 0u;
uint64_t dest_regs = 0u;
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
- DCHECK_EQ(src.GetSize(), dest.GetSize());
+ const FrameOffset ref = refs[i];
+ if (ref != kInvalidReferenceOffset) {
+ DCHECK_EQ(src.GetSize(), kObjectReferenceSize);
+ DCHECK_EQ(dest.GetSize(), static_cast<size_t>(kArm64PointerSize));
+ } else {
+ DCHECK_EQ(src.GetSize(), dest.GetSize());
+ }
if (dest.IsRegister()) {
- if (src.IsRegister() && src.GetRegister().Equals(dest.GetRegister())) {
+ // Note: For references, `Equals()` returns `false` for overlapping W and X registers.
+ if (ref != kInvalidReferenceOffset &&
+ src.IsRegister() &&
+ src.GetRegister().AsArm64().AsOverlappingXRegister() ==
+ dest.GetRegister().AsArm64().AsXRegister()) {
+ // Just convert to `jobject`. No further processing is needed.
+ CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+ } else if (src.IsRegister() && src.GetRegister().Equals(dest.GetRegister())) {
// Nothing to do.
} else {
if (src.IsRegister()) {
@@ -407,6 +453,16 @@
}
dest_regs |= get_mask(dest.GetRegister());
}
+ } else if (ref != kInvalidReferenceOffset) {
+ if (src.IsRegister()) {
+ // Note: We can clobber `src` here as the register cannot hold more than one argument.
+ ManagedRegister src_x =
+ CoreRegisterWithSize(src.GetRegister(), static_cast<size_t>(kArm64PointerSize));
+ CreateJObject(src_x, ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+ Store(dest.GetFrameOffset(), src_x, dest.GetSize());
+ } else {
+ CreateJObject(dest.GetFrameOffset(), ref, /*null_allowed=*/ i != 0u);
+ }
} else {
if (src.IsRegister()) {
Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
@@ -419,9 +475,10 @@
// There should be no cycles, so this simple algorithm should make progress.
while (dest_regs != 0u) {
uint64_t old_dest_regs = dest_regs;
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
+ const FrameOffset ref = refs[i];
if (!dest.IsRegister()) {
continue; // Stored in first loop above.
}
@@ -433,10 +490,19 @@
continue; // Cannot clobber this register yet.
}
if (src.IsRegister()) {
- Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
+ if (ref != kInvalidReferenceOffset) {
+ CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+ } else {
+ Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
+ }
src_regs &= ~get_mask(src.GetRegister()); // Allow clobbering source register.
} else {
- Load(dest.GetRegister(), src.GetFrameOffset(), dest.GetSize());
+ if (ref != kInvalidReferenceOffset) {
+ CreateJObject(
+ dest.GetRegister(), ref, ManagedRegister::NoRegister(), /*null_allowed=*/ i != 0u);
+ } else {
+ Load(dest.GetRegister(), src.GetFrameOffset(), dest.GetSize());
+ }
}
dest_regs &= ~get_mask(dest.GetRegister()); // Destination register was filled.
}
@@ -695,23 +761,22 @@
bool null_allowed) {
Arm64ManagedRegister out_reg = m_out_reg.AsArm64();
Arm64ManagedRegister in_reg = m_in_reg.AsArm64();
- // For now we only hold stale handle scope entries in x registers.
- CHECK(in_reg.IsNoRegister() || in_reg.IsXRegister()) << in_reg;
+ CHECK(in_reg.IsNoRegister() || in_reg.IsWRegister()) << in_reg;
CHECK(out_reg.IsXRegister()) << out_reg;
if (null_allowed) {
+ UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+ Register scratch = temps.AcquireX();
+
// Null values get a jobject value null. Otherwise, the jobject is
// the address of the spilled reference.
// e.g. out_reg = (in == 0) ? 0 : (SP+spilled_reference_offset)
if (in_reg.IsNoRegister()) {
- LoadWFromOffset(kLoadWord, out_reg.AsOverlappingWRegister(), SP,
- spilled_reference_offset.Int32Value());
- in_reg = out_reg;
+ in_reg = Arm64ManagedRegister::FromWRegister(out_reg.AsOverlappingWRegister());
+ LoadWFromOffset(kLoadWord, in_reg.AsWRegister(), SP, spilled_reference_offset.Int32Value());
}
- ___ Cmp(reg_w(in_reg.AsOverlappingWRegister()), 0);
- if (!out_reg.Equals(in_reg)) {
- LoadImmediate(out_reg.AsXRegister(), 0, eq);
- }
- AddConstant(out_reg.AsXRegister(), SP, spilled_reference_offset.Int32Value(), ne);
+ ___ Add(scratch, reg_x(SP), spilled_reference_offset.Int32Value());
+ ___ Cmp(reg_w(in_reg.AsWRegister()), 0);
+ ___ Csel(reg_x(out_reg.AsXRegister()), scratch, xzr, ne);
} else {
AddConstant(out_reg.AsXRegister(), SP, spilled_reference_offset.Int32Value(), al);
}
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 32f1ea9..b6e31c2 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -88,7 +88,9 @@
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override;
// Copying routines.
- void MoveArguments(ArrayRef<ArgumentLocation> dests, ArrayRef<ArgumentLocation> srcs) override;
+ void MoveArguments(ArrayRef<ArgumentLocation> dests,
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) override;
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 79ab025..b2d4dcd 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -171,13 +171,15 @@
__ Move(hidden_arg_register, method_register, 4);
__ VerifyObject(scratch_register, false);
- __ CreateJObject(scratch_register, FrameOffset(48), scratch_register, true);
- __ CreateJObject(scratch_register, FrameOffset(48), scratch_register, false);
- __ CreateJObject(method_register, FrameOffset(48), scratch_register, true);
+ // Note: `CreateJObject()` may need the scratch register IP. Test with another high register.
+ const ManagedRegister high_register = ArmManagedRegister::FromCoreRegister(R11);
+ __ CreateJObject(high_register, FrameOffset(48), high_register, true);
+ __ CreateJObject(high_register, FrameOffset(48), high_register, false);
+ __ CreateJObject(method_register, FrameOffset(48), high_register, true);
__ CreateJObject(FrameOffset(48), FrameOffset(64), true);
- __ CreateJObject(method_register, FrameOffset(0), scratch_register, true);
- __ CreateJObject(method_register, FrameOffset(1025), scratch_register, true);
- __ CreateJObject(scratch_register, FrameOffset(1025), scratch_register, true);
+ __ CreateJObject(method_register, FrameOffset(0), high_register, true);
+ __ CreateJObject(method_register, FrameOffset(1028), high_register, true);
+ __ CreateJObject(high_register, FrameOffset(1028), high_register, true);
std::unique_ptr<JNIMacroLabel> exception_slow_path = __ CreateLabel();
__ ExceptionPoll(exception_slow_path.get());
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 9b5b6e2..541458b 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -52,30 +52,30 @@
" a8: 48 46 mov r0, r9\n"
" aa: cd f8 30 90 str.w r9, [sp, #48]\n"
" ae: 04 46 mov r4, r0\n"
- " b0: bc f1 00 0f cmp.w r12, #0\n"
- " b4: 18 bf it ne\n"
- " b6: 0d f1 30 0c addne.w r12, sp, #48\n"
- " ba: 0d f1 30 0c add.w r12, sp, #48\n"
- " be: bc f1 00 0f cmp.w r12, #0\n"
- " c2: 0c bf ite eq\n"
- " c4: 00 20 moveq r0, #0\n"
+ " b0: 0d f1 30 0c add.w r12, sp, #48\n"
+ " b4: bb f1 00 0f cmp.w r11, #0\n"
+ " b8: 18 bf it ne\n"
+ " ba: e3 46 movne r11, r12\n"
+ " bc: 0d f1 30 0b add.w r11, sp, #48\n"
+ " c0: 5f ea 0b 00 movs.w r0, r11\n"
+ " c4: 18 bf it ne\n"
" c6: 0c a8 addne r0, sp, #48\n"
" c8: dd f8 40 c0 ldr.w r12, [sp, #64]\n"
" cc: bc f1 00 0f cmp.w r12, #0\n"
" d0: 18 bf it ne\n"
" d2: 0d f1 40 0c addne.w r12, sp, #64\n"
" d6: cd f8 30 c0 str.w r12, [sp, #48]\n"
- " da: bc f1 00 0f cmp.w r12, #0\n"
- " de: 0c bf ite eq\n"
- " e0: 00 20 moveq r0, #0\n"
- " e2: 68 46 movne r0, sp\n"
- " e4: bc f1 00 0f cmp.w r12, #0\n"
- " e8: 0c bf ite eq\n"
- " ea: 00 20 moveq r0, #0\n"
- " ec: 0d f2 01 40 addwne r0, sp, #1025\n"
- " f0: bc f1 00 0f cmp.w r12, #0\n"
- " f4: 18 bf it ne\n"
- " f6: 0d f2 01 4c addwne r12, sp, #1025\n"
+ " da: 5f ea 0b 00 movs.w r0, r11\n"
+ " de: 18 bf it ne\n"
+ " e0: 00 a8 addne r0, sp, #0\n"
+ " e2: 0d f2 04 40 addw r0, sp, #1028\n"
+ " e6: bb f1 00 0f cmp.w r11, #0\n"
+ " ea: 08 bf it eq\n"
+ " ec: 58 46 moveq r0, r11\n"
+ " ee: 0d f2 04 4c addw r12, sp, #1028\n"
+ " f2: bb f1 00 0f cmp.w r11, #0\n"
+ " f6: 18 bf it ne\n"
+ " f8: e3 46 movne r11, r12\n"
" fa: d9 f8 8c c0 ldr.w r12, [r9, #140]\n"
" fe: bc f1 00 0f cmp.w r12, #0\n"
" 102: 71 d1 bne 0x1e8 @ imm = #226\n"
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 7f5dc2f..abb53b7 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -152,7 +152,16 @@
virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
// Copying routines
- virtual void MoveArguments(ArrayRef<ArgumentLocation> dests, ArrayRef<ArgumentLocation> srcs) = 0;
+
+ // Move arguments from `srcs` locations to `dests` locations.
+ //
+ // References shall be spilled to `refs` frame offsets (kInvalidReferenceOffset indicates
+ // a non-reference type) if they are in registers and corresponding `dests` shall be
+ // filled with `jobject` replacements. If the first argument is a reference, it is
+ // assumed to be `this` and cannot be null, all other reference arguments can be null.
+ virtual void MoveArguments(ArrayRef<ArgumentLocation> dests,
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) = 0;
virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
@@ -276,6 +285,8 @@
emit_run_time_checks_in_debug_mode_ = value;
}
+ static constexpr FrameOffset kInvalidReferenceOffset = FrameOffset(0);
+
protected:
JNIMacroAssembler() {}
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 2e7f23d..d0afa72 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -317,35 +317,57 @@
}
void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
- ArrayRef<ArgumentLocation> srcs) {
- DCHECK_EQ(dests.size(), srcs.size());
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) {
+ size_t arg_count = dests.size();
+ DCHECK_EQ(arg_count, srcs.size());
+ DCHECK_EQ(arg_count, refs.size());
+
+ // Store register args to stack slots. Convert processed references to `jobject`.
bool found_hidden_arg = false;
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
- DCHECK_EQ(src.GetSize(), dest.GetSize());
+ const FrameOffset ref = refs[i];
+ DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references.
if (src.IsRegister()) {
if (UNLIKELY(dest.IsRegister())) {
// Native ABI has only stack arguments but we may pass one "hidden arg" in register.
CHECK(!found_hidden_arg);
found_hidden_arg = true;
+ DCHECK_EQ(ref, kInvalidReferenceOffset);
DCHECK(
!dest.GetRegister().Equals(X86ManagedRegister::FromCpuRegister(GetScratchRegister())));
Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
} else {
+ if (ref != kInvalidReferenceOffset) {
+ Store(ref, srcs[i].GetRegister(), kObjectReferenceSize);
+ // Note: We can clobber `src` here as the register cannot hold more than one argument.
+ // This overload of `CreateJObject()` currently does not use the scratch
+ // register ECX, so this shall not clobber another argument.
+ CreateJObject(src.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+ }
Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
}
} else {
// Delay copying until we have spilled all registers, including the scratch register ECX.
}
}
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+
+ // Copy incoming stack args. Convert processed references to `jobject`.
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
- DCHECK_EQ(src.GetSize(), dest.GetSize());
+ const FrameOffset ref = refs[i];
+ DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references.
if (!src.IsRegister()) {
DCHECK(!dest.IsRegister());
- Copy(dest.GetFrameOffset(), src.GetFrameOffset(), dest.GetSize());
+ if (ref != kInvalidReferenceOffset) {
+ DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]);
+ CreateJObject(dest.GetFrameOffset(), ref, /*null_allowed=*/ i != 0u);
+ } else {
+ Copy(dest.GetFrameOffset(), src.GetFrameOffset(), dest.GetSize());
+ }
}
}
}
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 68822f8..058e040 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -86,7 +86,9 @@
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override;
// Copying routines
- void MoveArguments(ArrayRef<ArgumentLocation> dests, ArrayRef<ArgumentLocation> srcs) override;
+ void MoveArguments(ArrayRef<ArgumentLocation> dests,
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) override;
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index afed413..1425a4c 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -353,8 +353,12 @@
}
void X86_64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
- ArrayRef<ArgumentLocation> srcs) {
- DCHECK_EQ(dests.size(), srcs.size());
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) {
+ size_t arg_count = dests.size();
+ DCHECK_EQ(arg_count, srcs.size());
+ DCHECK_EQ(arg_count, refs.size());
+
auto get_mask = [](ManagedRegister reg) -> uint32_t {
X86_64ManagedRegister x86_64_reg = reg.AsX86_64();
if (x86_64_reg.IsCpuRegister()) {
@@ -368,14 +372,32 @@
return (1u << 16u) << xmm_reg_number;
}
};
+
// Collect registers to move while storing/copying args to stack slots.
+ // Convert all register references and copied stack references to `jobject`.
uint32_t src_regs = 0u;
uint32_t dest_regs = 0u;
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
- DCHECK_EQ(src.GetSize(), dest.GetSize());
+ const FrameOffset ref = refs[i];
+ if (ref != kInvalidReferenceOffset) {
+ DCHECK_EQ(src.GetSize(), kObjectReferenceSize);
+ DCHECK_EQ(dest.GetSize(), static_cast<size_t>(kX86_64PointerSize));
+ } else {
+ DCHECK_EQ(src.GetSize(), dest.GetSize());
+ }
+ if (src.IsRegister() && ref != kInvalidReferenceOffset) {
+ Store(ref, src.GetRegister(), kObjectReferenceSize);
+ // Note: We can clobber `src` here as the register cannot hold more than one argument.
+ // This overload of `CreateJObject()` is currently implemented as "test and branch";
+ // if it was using a conditional move, it would be better to do this at move time.
+ CreateJObject(src.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+ }
if (dest.IsRegister()) {
+ // Note: X86_64ManagedRegister makes no distinction between 32-bit and 64-bit core
+ // registers, so the following `Equals()` can return `true` for references; the
+ // reference has already been converted to `jobject` above.
if (src.IsRegister() && src.GetRegister().Equals(dest.GetRegister())) {
// Nothing to do.
} else {
@@ -387,18 +409,22 @@
} else {
if (src.IsRegister()) {
Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
+ } else if (ref != kInvalidReferenceOffset) {
+ CreateJObject(dest.GetFrameOffset(), ref, /*null_allowed=*/ i != 0u);
} else {
Copy(dest.GetFrameOffset(), src.GetFrameOffset(), dest.GetSize());
}
}
}
- // Fill destination registers.
+
+ // Fill destination registers. Convert loaded references to `jobject`.
// There should be no cycles, so this simple algorithm should make progress.
while (dest_regs != 0u) {
uint32_t old_dest_regs = dest_regs;
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
+ const FrameOffset ref = refs[i];
if (!dest.IsRegister()) {
continue; // Stored in first loop above.
}
@@ -412,6 +438,9 @@
if (src.IsRegister()) {
Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
src_regs &= ~get_mask(src.GetRegister()); // Allow clobbering source register.
+ } else if (ref != kInvalidReferenceOffset) {
+ CreateJObject(
+ dest.GetRegister(), ref, ManagedRegister::NoRegister(), /*null_allowed=*/ i != 0u);
} else {
Load(dest.GetRegister(), src.GetFrameOffset(), dest.GetSize());
}
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index 71c3035..3e5dfb7 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -89,7 +89,9 @@
void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override;
// Copying routines
- void MoveArguments(ArrayRef<ArgumentLocation> dests, ArrayRef<ArgumentLocation> srcs) override;
+ void MoveArguments(ArrayRef<ArgumentLocation> dests,
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) override;
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;