Direct calls to @CriticalNative methods.
Emit direct calls from compiled managed code to the native
code registered with the method, avoiding the JNI stub.
Golem results:
art-opt-cc x86 x86-64 arm arm64
NativeDowncallStaticCritical +12.5% +62.5% +75.9% +41.7%
NativeDowncallStaticCritical6 +55.6% +87.5% +72.1% +35.3%
art-opt x86 x86-64 arm arm64
NativeDowncallStaticCritical +28.6% +85.6% +76.4% +38.4%
NativeDowncallStaticCritical6 +44.6% +44.6% +74.6% +32.2%
Test: Covered by 178-app-image-native-method.
Test: m test-art-host-gtest
Test: testrunner.py --host --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: testrunner.py --target --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use -t 178
Test: aosp_cf_x86_phone-userdebug boots.
Test: aosp_cf_x86_phone-userdebug/jitzygote boots.
Bug: 112189621
Change-Id: I8b37da51e8fe0b7bc513bb81b127fe0416068866
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 2db1390..685e1e2 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -314,6 +314,12 @@
}
ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig;
+ // Make sure the test class is visibly initialized so that the RegisterNatives() below
+ // sets the JNI entrypoint rather than leaving it as null (this test pretends to be an
+ // AOT compiler and therefore the ClassLinker skips entrypoint initialization). Even
+ // if the ClassLinker initialized it with a stub, we would not want to test that here.
+ class_linker_->MakeInitializedClassesVisiblyInitialized(Thread::Current(), /*wait=*/ true);
+
if (native_fnptr != nullptr) {
JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } };
ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1))
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index d07ab98..7afa8b1 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -420,7 +420,7 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t ArmJniCallingConvention::OutArgSize() const {
+size_t ArmJniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
// Account for arguments passed through r0-r3. (No FP args, AAPCS32 is soft-float.)
@@ -440,7 +440,7 @@
}
size_t out_args_size = RoundUp(size, kAapcsStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -512,9 +512,9 @@
CHECK_GE(itr_slots_, kJniArgumentRegisterCount);
size_t offset =
displacement_.Int32Value()
- - OutArgSize()
+ - OutFrameSize()
+ ((itr_slots_ - kJniArgumentRegisterCount) * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
@@ -537,7 +537,7 @@
// Whether to use tail call (used only for @CriticalNative).
bool ArmJniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == 0u;
+ return OutFrameSize() == 0u;
}
} // namespace arm
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 7896d64..38f7184 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -65,7 +65,7 @@
// JNI calling convention
void Next() override; // Override default behavior for AAPCS
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 32da141..06796c1 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -265,20 +265,14 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t Arm64JniCallingConvention::OutArgSize() const {
+size_t Arm64JniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
size_t num_fp_args = NumFloatOrDoubleArgs();
DCHECK_GE(all_args, num_fp_args);
size_t num_non_fp_args = all_args - num_fp_args;
- // Account for FP arguments passed through v0-v7.
- size_t num_stack_fp_args =
- num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
- // Account for other (integer and pointer) arguments passed through GPR (x0-x7).
- size_t num_stack_non_fp_args =
- num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
// The size of outgoing arguments.
- size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+ size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
// @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64.
static_assert((kCoreCalleeSpillMask & ~kAapcs64CoreCalleeSpillMask) == 0u);
@@ -291,7 +285,7 @@
}
size_t out_args_size = RoundUp(size, kAapcs64StackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -355,8 +349,8 @@
static_cast<size_t>(itr_float_and_doubles_))
- std::min(kMaxIntLikeRegisterArguments,
static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
- size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
@@ -378,7 +372,7 @@
// Whether to use tail call (used only for @CriticalNative).
bool Arm64JniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == 0u;
+ return OutFrameSize() == 0u;
}
} // namespace arm64
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 7beca08..d381d9d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -56,7 +56,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index b4396f0..005ae91 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -303,9 +303,9 @@
// always at the bottom of a frame, but this doesn't work for outgoing
// native args). Includes alignment.
virtual size_t FrameSize() const = 0;
- // Size of outgoing arguments (stack portion), including alignment.
+ // Size of outgoing frame, i.e. stack arguments, @CriticalNative return PC if needed, alignment.
// -- Arguments that are passed via registers are excluded from this size.
- virtual size_t OutArgSize() const = 0;
+ virtual size_t OutFrameSize() const = 0;
// Number of references in stack indirect reference table
size_t ReferenceCount() const;
// Location where the segment state of the local indirect reference table is saved
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 036cdbb..913a3ba 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -220,7 +220,7 @@
// 1. Build the frame saving all callee saves, Method*, and PC return address.
// For @CriticalNative, this includes space for out args, otherwise just the managed frame.
const size_t managed_frame_size = main_jni_conv->FrameSize();
- const size_t main_out_arg_size = main_jni_conv->OutArgSize();
+ const size_t main_out_arg_size = main_jni_conv->OutFrameSize();
size_t current_frame_size = is_critical_native ? main_out_arg_size : managed_frame_size;
ManagedRegister method_register =
is_critical_native ? ManagedRegister::NoRegister() : mr_conv->MethodRegister();
@@ -582,7 +582,7 @@
if (LIKELY(!is_critical_native)) {
// Increase frame size for out args if needed by the end_jni_conv.
- const size_t end_out_arg_size = end_jni_conv->OutArgSize();
+ const size_t end_out_arg_size = end_jni_conv->OutFrameSize();
if (end_out_arg_size > current_out_arg_size) {
size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
current_out_arg_size = end_out_arg_size;
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 6776f12..df45627 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -220,11 +220,10 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t X86JniCallingConvention::OutArgSize() const {
- // Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
- size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
- // The size of outgoiong arguments.
- size_t size = all_args * kFramePointerSize;
+size_t X86JniCallingConvention::OutFrameSize() const {
+ // The size of outgoing arguments.
+ size_t size = GetNativeOutArgsSize(/*num_args=*/ NumberOfExtraArgumentsForJni() + NumArgs(),
+ NumLongOrDoubleArgs());
// @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS.
static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
@@ -244,14 +243,16 @@
if (return_type_ok && size == kFramePointerSize) {
// Note: This is not aligned to kNativeStackAlignment but that's OK for tail call.
static_assert(kFramePointerSize < kNativeStackAlignment);
- DCHECK_EQ(kFramePointerSize, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ // The stub frame size is considered 0 in the callee where the return PC is a part of
+ // the callee frame but it is kPointerSize in the compiled stub before the tail call.
+ DCHECK_EQ(0u, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
return kFramePointerSize;
}
}
size_t out_args_size = RoundUp(size, kNativeStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -279,7 +280,8 @@
}
FrameOffset X86JniCallingConvention::CurrentParamStackOffset() {
- return FrameOffset(displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kFramePointerSize));
+ return
+ FrameOffset(displacement_.Int32Value() - OutFrameSize() + (itr_slots_ * kFramePointerSize));
}
ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const {
@@ -295,7 +297,7 @@
bool X86JniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == kFramePointerSize;
+ return OutFrameSize() == kFramePointerSize;
}
} // namespace x86
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 6f22c2b..81f617d 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -61,7 +61,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index e97cab8..44ae8be 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -208,21 +208,14 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t X86_64JniCallingConvention::OutArgSize() const {
+size_t X86_64JniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
size_t num_fp_args = NumFloatOrDoubleArgs();
DCHECK_GE(all_args, num_fp_args);
size_t num_non_fp_args = all_args - num_fp_args;
- // Account for FP arguments passed through Xmm0..Xmm7.
- size_t num_stack_fp_args =
- num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
- // Account for other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9).
- size_t num_stack_non_fp_args =
- num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
// The size of outgoing arguments.
- static_assert(kFramePointerSize == kMmxSpillSize);
- size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+ size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
if (UNLIKELY(IsCriticalNative())) {
// We always need to spill xmm12-xmm15 as they are managed callee-saves
@@ -239,7 +232,7 @@
size_t out_args_size = RoundUp(size, kNativeStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -297,8 +290,8 @@
- std::min(kMaxIntLikeRegisterArguments,
static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
// Integer arguments passed through GPR
- size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index d043a3e..5bde766 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -56,7 +56,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index cfd9ea6..f74a938 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -32,6 +32,7 @@
#include "code_generator_x86_64.h"
#endif
+#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
#include "base/casts.h"
@@ -503,23 +504,69 @@
if (invoke->IsInvokeStaticOrDirect()) {
HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
- switch (call->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
- break;
- case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
- locations->AddTemp(visitor->GetMethodLocation());
- locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
- break;
- default:
- locations->AddTemp(visitor->GetMethodLocation());
- break;
+ HInvokeStaticOrDirect::MethodLoadKind method_load_kind = call->GetMethodLoadKind();
+ HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = call->GetCodePtrLocation();
+ if (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ locations->AddTemp(Location::RequiresRegister()); // For target method.
+ }
+ if (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative ||
+ method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kRecursive) {
+ // For `kCallCriticalNative` we need the current method as the hidden argument
+ // if we reach the dlsym lookup stub for @CriticalNative.
+ locations->SetInAt(call->GetCurrentMethodIndex(), visitor->GetMethodLocation());
+ } else {
+ locations->AddTemp(visitor->GetMethodLocation());
+ if (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall) {
+ locations->SetInAt(call->GetCurrentMethodIndex(), Location::RequiresRegister());
+ }
}
} else if (!invoke->IsInvokePolymorphic()) {
locations->AddTemp(visitor->GetMethodLocation());
}
}
+void CodeGenerator::PrepareCriticalNativeArgumentMoves(
+ HInvokeStaticOrDirect* invoke,
+ /*inout*/InvokeDexCallingConventionVisitor* visitor,
+ /*out*/HParallelMove* parallel_move) {
+ LocationSummary* locations = invoke->GetLocations();
+ for (size_t i = 0, num = invoke->GetNumberOfArguments(); i != num; ++i) {
+ Location in_location = locations->InAt(i);
+ DataType::Type type = invoke->InputAt(i)->GetType();
+ DCHECK_NE(type, DataType::Type::kReference);
+ Location out_location = visitor->GetNextLocation(type);
+ if (out_location.IsStackSlot() || out_location.IsDoubleStackSlot()) {
+ // Stack arguments will need to be moved after adjusting the SP.
+ parallel_move->AddMove(in_location, out_location, type, /*instruction=*/ nullptr);
+ } else {
+ // Register arguments should have been assigned their final locations for register allocation.
+ DCHECK(out_location.Equals(in_location)) << in_location << " -> " << out_location;
+ }
+ }
+}
+
+void CodeGenerator::AdjustCriticalNativeArgumentMoves(size_t out_frame_size,
+ /*inout*/HParallelMove* parallel_move) {
+ // Adjust the source stack offsets by `out_frame_size`, i.e. the additional
+ // frame size needed for outgoing stack arguments.
+ for (size_t i = 0, num = parallel_move->NumMoves(); i != num; ++i) {
+ MoveOperands* operands = parallel_move->MoveOperandsAt(i);
+ Location source = operands->GetSource();
+ if (operands->GetSource().IsStackSlot()) {
+ operands->SetSource(Location::StackSlot(source.GetStackIndex() + out_frame_size));
+ } else if (operands->GetSource().IsDoubleStackSlot()) {
+ operands->SetSource(Location::DoubleStackSlot(source.GetStackIndex() + out_frame_size));
+ }
+ }
+}
+
+const char* CodeGenerator::GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke,
+ uint32_t* shorty_len) {
+ ScopedObjectAccess soa(Thread::Current());
+ DCHECK(invoke->GetResolvedMethod()->IsCriticalNative());
+ return invoke->GetResolvedMethod()->GetShorty(shorty_len);
+}
+
void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
MoveConstant(temp, invoke->GetDexMethodIndex());
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ff2be47..4bfc14a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -570,6 +570,28 @@
static void CreateCommonInvokeLocationSummary(
HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
+ template <typename CriticalNativeCallingConventionVisitor,
+ size_t kNativeStackAlignment,
+ size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len)>
+ static size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke,
+ /*out*/HParallelMove* parallel_move) {
+ DCHECK(!invoke->GetLocations()->Intrinsified());
+ CriticalNativeCallingConventionVisitor calling_convention_visitor(
+ /*for_register_allocation=*/ false);
+ PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, parallel_move);
+ size_t out_frame_size =
+ RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment);
+ if (kIsDebugBuild) {
+ uint32_t shorty_len;
+ const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len);
+ DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size);
+ }
+ if (out_frame_size != 0u) {
+ AdjustCriticalNativeArgumentMoves(out_frame_size, parallel_move);
+ }
+ return out_frame_size;
+ }
+
void GenerateInvokeStaticOrDirectRuntimeCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
@@ -799,6 +821,16 @@
bool needs_vreg_info = true);
void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path);
+ static void PrepareCriticalNativeArgumentMoves(
+ HInvokeStaticOrDirect* invoke,
+ /*inout*/InvokeDexCallingConventionVisitor* visitor,
+ /*out*/HParallelMove* parallel_move);
+
+ static void AdjustCriticalNativeArgumentMoves(size_t out_frame_size,
+ /*inout*/HParallelMove* parallel_move);
+
+ static const char* GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke, uint32_t* shorty_len);
+
OptimizingCompilerStats* stats_;
HGraph* const graph_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4a618de..d108623 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -18,6 +18,7 @@
#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/arm64/jni_frame_arm64.h"
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
@@ -870,6 +871,49 @@
return LocationFrom(kArtMethodRegister);
}
+Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFPRegistersLength) {
+ location = LocationFrom(kParameterFPRegisters[fpr_index_]);
+ ++fpr_index_;
+ }
+ } else {
+ // Native ABI uses the same registers as managed, except that the method register x0
+ // is a normal argument.
+ if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
+ location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
+ // Pass the method in the hidden argument x15.
+ return Location::RegisterLocation(x15.GetCode());
+}
+
CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -4295,7 +4339,13 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
@@ -4327,7 +4377,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -4373,6 +4423,19 @@
}
}
+ auto call_code_pointer_member = [&](MemberOffset offset) {
+ // LR = callee_method->member;
+ __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+ {
+ // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ // lr()
+ __ blr(lr);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ }
+ };
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
{
@@ -4384,20 +4447,50 @@
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
break;
- case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
- // LR = callee_method->entry_point_from_quick_compiled_code_;
- __ Ldr(lr, MemOperand(
- XRegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
- {
- // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
- ExactAssemblyScope eas(GetVIXLAssembler(),
- kInstructionSize,
- CodeBufferCheckScope::kExactSize);
- // lr()
- __ blr(lr);
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
+ kAapcs64StackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ Claim(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
}
+ call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArm64PointerSize));
+ // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ Ubfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kInt8:
+ __ Sbfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kUint16:
+ __ Ubfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt16:
+ __ Sbfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ Drop(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+ call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize));
break;
}
@@ -4819,14 +4912,9 @@
return;
}
- {
- // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- LocationSummary* locations = invoke->GetLocations();
- codegen_->GenerateStaticOrDirectCall(
- invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
- }
+ LocationSummary* locations = invoke->GetLocations();
+ codegen_->GenerateStaticOrDirectCall(
+ invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 487d091..bebf43d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -231,6 +231,31 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
};
+class CriticalNativeCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorARM64(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorARM64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t fpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorARM64);
+};
+
class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionARM64() {}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 1d8fd6c..9916257 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -18,6 +18,7 @@
#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm/jni_frame_arm.h"
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
@@ -2435,6 +2436,54 @@
return LocationFrom(kMethodRegister);
}
+Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ // Native ABI uses the same registers as managed, except that the method register r0
+ // is a normal argument.
+ Location location = Location::NoLocation();
+ if (DataType::Is64BitType(type)) {
+ gpr_index_ = RoundUp(gpr_index_, 2u);
+ stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
+ if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
+ location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
+ kParameterCoreRegistersVIXL[gpr_index_]);
+ gpr_index_ += 2u;
+ }
+ } else {
+ if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
+ location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ stack_offset_ += 2 * kFramePointerSize;
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ stack_offset_ += kFramePointerSize;
+ }
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
+ const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
+ // Pass the method in the hidden argument R4.
+ return Location::RegisterLocation(R4);
+}
+
void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
if (source.Equals(destination)) {
return;
@@ -3294,7 +3343,13 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
@@ -8856,35 +8911,35 @@
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ ArtMethod* method) {
+ if (desired_dispatch_info.code_ptr_location ==
+ HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ // TODO: Work around CheckTypeConsistency() in code_generator.cc that does not allow
+ // putting FP values in core registers as we need to do for the soft-float native ABI.
+ ScopedObjectAccess soa(Thread::Current());
+ uint32_t shorty_len;
+ const char* shorty = method->GetShorty(&shorty_len);
+ size_t reg = 0u;
+ for (uint32_t i = 1; i != shorty_len; ++i) {
+ size_t next_reg = reg + 1u;
+ if (shorty[i] == 'D' || shorty[i] == 'J') {
+ reg = RoundUp(reg, 2u);
+ next_reg = reg + 2u;
+ }
+ if (reg == 4u) {
+ break;
+ }
+ if (shorty[i] == 'D' || shorty[i] == 'F') {
+ HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+ dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+ return dispatch_info;
+ }
+ reg = next_reg;
+ }
+ }
return desired_dispatch_info;
}
-vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
- HInvokeStaticOrDirect* invoke, vixl32::Register temp) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
- Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
- if (!invoke->GetLocations()->Intrinsified()) {
- return RegisterFrom(location);
- }
- // For intrinsics we allow any location, so it may be on the stack.
- if (!location.IsRegister()) {
- GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, location.GetStackIndex());
- return temp;
- }
- // For register locations, check if the register was saved. If so, get it from the stack.
- // Note: There is a chance that the register was saved but not overwritten, so we could
- // save one load. However, since this is just an intrinsic slow path we prefer this
- // simple and more robust approach rather that trying to determine if that's the case.
- SlowPathCode* slow_path = GetCurrentSlowPath();
- if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) {
- int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode());
- GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset);
- return temp;
- }
- return RegisterFrom(location);
-}
-
void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
@@ -8897,7 +8952,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -8932,6 +8987,20 @@
}
}
+ auto call_code_pointer_member = [&](MemberOffset offset) {
+ // LR = callee_method->member;
+ GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
+ {
+ // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+ // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+ ExactAssemblyScope aas(GetVIXLAssembler(),
+ vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+ // LR()
+ __ blx(lr);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ }
+ };
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
{
@@ -8943,23 +9012,46 @@
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
break;
- case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
- // LR = callee_method->entry_point_from_quick_compiled_code_
- GetAssembler()->LoadFromOffset(
- kLoadWord,
- lr,
- RegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
- {
- // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
- // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
- ExactAssemblyScope aas(GetVIXLAssembler(),
- vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
- // LR()
- __ blx(lr);
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
+ kAapcsStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ Claim(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
}
+ call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
+ // Move the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kFloat32:
+ __ Vmov(s0, r0);
+ break;
+ case DataType::Type::kFloat64:
+ __ Vmov(d0, r0, r1);
+ break;
+ case DataType::Type::kBool:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ Drop(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+ call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
break;
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 3eed730..d6300c7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -187,6 +187,30 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARMVIXL);
};
+class CriticalNativeCallingConventionVisitorARMVIXL : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorARMVIXL(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorARMVIXL() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorARMVIXL);
+};
+
class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionARMVIXL() {}
@@ -853,9 +877,6 @@
uint32_t encoded_data,
/*out*/ std::string* debug_name);
- vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
- vixl::aarch32::Register temp);
-
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, VIXLUInt32Literal*>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
VIXLUInt32Literal*,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e9ef21a..595b31e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86.h"
+#include "arch/x86/jni_frame_x86.h"
#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
@@ -1300,6 +1301,34 @@
return Location::NoLocation();
}
+Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location;
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ stack_offset_ += 2 * kFramePointerSize;
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ stack_offset_ += kFramePointerSize;
+ }
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorX86 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
+ // Pass the method in the hidden argument EAX.
+ return Location::RegisterLocation(EAX);
+}
+
void CodeGeneratorX86::Move32(Location destination, Location source) {
if (source.Equals(destination)) {
return;
@@ -1374,11 +1403,13 @@
size_t elem_size = DataType::Size(DataType::Type::kInt32);
// Create stack space for 2 elements.
__ subl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(2 * elem_size);
__ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
__ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
__ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
// And remove the temporary stack space we allocated.
__ addl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(-(2 * elem_size));
} else {
LOG(FATAL) << "Unimplemented";
}
@@ -2286,9 +2317,15 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
- // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+ // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
if (invoke->HasPcRelativeMethodLoadKind()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
}
@@ -2989,6 +3026,7 @@
if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
adjustment = DataType::Size(DataType::Type::kInt64);
__ subl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(adjustment);
}
// Load the value to the FP stack, using temporaries if needed.
@@ -3005,6 +3043,7 @@
// Remove the temporary stack space we allocated.
if (adjustment != 0) {
__ addl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(-adjustment);
}
break;
}
@@ -3039,6 +3078,7 @@
if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
adjustment = DataType::Size(DataType::Type::kInt64);
__ subl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(adjustment);
}
// Load the value to the FP stack, using temporaries if needed.
@@ -3055,6 +3095,7 @@
// Remove the temporary stack space we allocated.
if (adjustment != 0) {
__ addl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(-adjustment);
}
break;
}
@@ -3551,6 +3592,7 @@
// Create stack space for 2 elements.
// TODO: enhance register allocator to ask for stack temporaries.
__ subl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(2 * elem_size);
// Load the values to the FP stack in reverse order, using temporaries if needed.
const bool is_wide = !is_float;
@@ -3591,6 +3633,7 @@
// And remove the temporary stack space we allocated.
__ addl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(-(2 * elem_size));
}
@@ -4934,7 +4977,6 @@
Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
Register temp) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
if (!invoke->GetLocations()->Intrinsified()) {
return location.AsRegister<Register>();
@@ -4970,7 +5012,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -5009,15 +5051,73 @@
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
__ call(GetFrameEntryLabel());
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
+ kNativeStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ subl(ESP, Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
+ }
+ // (callee_method + offset_of_jni_entry_point)()
+ __ call(Address(callee_method.AsRegister<Register>(),
+ ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
+ // Create space for conversion.
+ out_frame_size = 8u;
+ __ subl(ESP, Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(out_frame_size);
+ }
+ // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ movzxb(EAX, AL);
+ break;
+ case DataType::Type::kInt8:
+ __ movsxb(EAX, AL);
+ break;
+ case DataType::Type::kUint16:
+ __ movzxw(EAX, EAX);
+ break;
+ case DataType::Type::kInt16:
+ __ movsxw(EAX, EAX);
+ break;
+ case DataType::Type::kFloat32:
+ __ fstps(Address(ESP, 0));
+ __ movss(XMM0, Address(ESP, 0));
+ break;
+ case DataType::Type::kFloat64:
+ __ fstpl(Address(ESP, 0));
+ __ movsd(XMM0, Address(ESP, 0));
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ addl(ESP, Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
// (callee_method + offset_of_quick_compiled_code)()
__ call(Address(callee_method.AsRegister<Register>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86PointerSize).Int32Value()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
}
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
DCHECK(!IsLeafMethod());
}
@@ -5072,7 +5172,6 @@
}
void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
boot_image_method_patches_.emplace_back(
@@ -5081,7 +5180,6 @@
}
void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
// Add the patch entry and bind its label at the end of the instruction.
@@ -5126,7 +5224,6 @@
uint32_t boot_image_reference,
HInvokeStaticOrDirect* invoke) {
if (GetCompilerOptions().IsBootImage()) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
DCHECK(method_address != nullptr);
@@ -5135,7 +5232,6 @@
__ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
} else if (GetCompilerOptions().GetCompilePic()) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
DCHECK(method_address != nullptr);
@@ -5160,7 +5256,6 @@
if (GetCompilerOptions().IsBootImage()) {
DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
// Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
DCHECK(method_address != nullptr);
@@ -6365,24 +6460,45 @@
__ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
}
} else if (source.IsRegisterPair()) {
+ if (destination.IsRegisterPair()) {
+ __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
+ DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
+ __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+ } else if (destination.IsFpuRegister()) {
size_t elem_size = DataType::Size(DataType::Type::kInt32);
- // Create stack space for 2 elements.
- __ subl(ESP, Immediate(2 * elem_size));
- __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
- __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
+ // Push the 2 source registers to stack.
+ __ pushl(source.AsRegisterPairHigh<Register>());
+ __ cfi().AdjustCFAOffset(elem_size);
+ __ pushl(source.AsRegisterPairLow<Register>());
+ __ cfi().AdjustCFAOffset(elem_size);
+ // Load the destination register.
__ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
// And remove the temporary stack space we allocated.
__ addl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(-(2 * elem_size));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
+ __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
+ source.AsRegisterPairHigh<Register>());
+ }
} else if (source.IsFpuRegister()) {
if (destination.IsRegister()) {
__ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
} else if (destination.IsFpuRegister()) {
__ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
} else if (destination.IsRegisterPair()) {
- XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
- __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
- __ psrlq(src_reg, Immediate(32));
- __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
+ size_t elem_size = DataType::Size(DataType::Type::kInt32);
+ // Create stack space for 2 elements.
+ __ subl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(2 * elem_size);
+ // Store the source register.
+ __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
+ // And pop the values into destination registers.
+ __ popl(destination.AsRegisterPairLow<Register>());
+ __ cfi().AdjustCFAOffset(-elem_size);
+ __ popl(destination.AsRegisterPairHigh<Register>());
+ __ cfi().AdjustCFAOffset(-elem_size);
} else if (destination.IsStackSlot()) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
} else if (destination.IsDoubleStackSlot()) {
@@ -6480,9 +6596,12 @@
__ xorpd(dest, dest);
} else {
__ pushl(high);
+ __ cfi().AdjustCFAOffset(4);
__ pushl(low);
+ __ cfi().AdjustCFAOffset(4);
__ movsd(dest, Address(ESP, 0));
__ addl(ESP, Immediate(8));
+ __ cfi().AdjustCFAOffset(-8);
}
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
@@ -6520,10 +6639,12 @@
void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
size_t extra_slot = 4 * kX86WordSize;
__ subl(ESP, Immediate(extra_slot));
+ __ cfi().AdjustCFAOffset(extra_slot);
__ movups(Address(ESP, 0), XmmRegister(reg));
ExchangeMemory(0, mem + extra_slot, 4);
__ movups(XmmRegister(reg), Address(ESP, 0));
__ addl(ESP, Immediate(extra_slot));
+ __ cfi().AdjustCFAOffset(-extra_slot);
}
void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 43f5acd..22d8778 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -93,6 +93,29 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
};
+class CriticalNativeCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorX86(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorX86() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86);
+};
+
class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionX86() {}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ec54376..4a0cc78 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86_64.h"
+#include "arch/x86_64/jni_frame_x86_64.h"
#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
@@ -978,6 +979,16 @@
UNREACHABLE();
}
+void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
+ // We have to ensure that the native code we call directly (such as @CriticalNative
+ // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
+ // which are non-volatile for ART, but volatile for Native calls. This will ensure
+ // that they are saved in the prologue and properly restored.
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
+ locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+ }
+}
+
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
ArtMethod* method ATTRIBUTE_UNUSED) {
@@ -998,7 +1009,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -1032,15 +1043,61 @@
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
__ call(&frame_entry_label_);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
+ kNativeStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ subq(CpuRegister(RSP), Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
+ }
+ // (callee_method + offset_of_jni_entry_point)()
+ __ call(Address(callee_method.AsRegister<CpuRegister>(),
+ ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kInt8:
+ __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kUint16:
+ __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kInt16:
+ __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ addq(CpuRegister(RSP), Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
// (callee_method + offset_of_quick_compiled_code)()
__ call(Address(callee_method.AsRegister<CpuRegister>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86_64PointerSize).SizeValue()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
}
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
DCHECK(!IsLeafMethod());
}
@@ -2493,6 +2550,51 @@
return Location::NoLocation();
}
+Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFloatRegistersLength) {
+ location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
+ ++fpr_index_;
+ }
+ } else {
+ // Native ABI uses the same registers as managed, except that the method register RDI
+ // is a normal argument.
+ if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
+ location = Location::RegisterLocation(
+ gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
+ const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
+ // Pass the method in the hidden argument RAX.
+ return Location::RegisterLocation(RAX);
+}
+
void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
// The trampoline uses the same calling convention as dex calling conventions,
// except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
@@ -2514,7 +2616,14 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 01810f4..dcdd632 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -79,6 +79,31 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
+class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorX86_64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t fpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64);
+};
+
class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionX86_64() {}
@@ -609,6 +634,8 @@
void MaybeIncrementHotness(bool is_frame_entry);
+ static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
+
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.
static constexpr int32_t kDummy32BitOffset = 256;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index cd68b2a..60e1279 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1530,8 +1530,8 @@
if (invoke->IsInvokeStaticOrDirect() &&
HInvokeStaticOrDirect::NeedsCurrentMethodInput(
- invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
- DCHECK_EQ(argument_index, invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex());
+ invoke->AsInvokeStaticOrDirect()->GetDispatchInfo())) {
+ DCHECK_EQ(argument_index, invoke->AsInvokeStaticOrDirect()->GetCurrentMethodIndex());
DCHECK(invoke->InputAt(argument_index) == nullptr);
invoke->SetRawInputAt(argument_index, graph_->GetCurrentMethod());
}
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index e24d541..29f815c 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -59,7 +59,12 @@
Location method_loc = MoveArguments(codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
- codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
+ HInvokeStaticOrDirect* invoke_static_or_direct = invoke_->AsInvokeStaticOrDirect();
+ DCHECK_NE(invoke_static_or_direct->GetMethodLoadKind(),
+ HInvokeStaticOrDirect::MethodLoadKind::kRecursive);
+ DCHECK_NE(invoke_static_or_direct->GetCodePtrLocation(),
+ HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative);
+ codegen->GenerateStaticOrDirectCall(invoke_static_or_direct, method_loc, this);
} else {
codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7a0f131..af3fd76 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -398,12 +398,7 @@
locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
locations->SetOut(Location::FpuRegisterLocation(XMM0));
- // We have to ensure that the native code doesn't clobber the XMM registers which are
- // non-volatile for ART, but volatile for Native calls. This will ensure that they are
- // saved in the prologue and properly restored.
- for (FloatRegister fp_reg : non_volatile_xmm_regs) {
- locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
- }
+ CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
}
static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
@@ -535,12 +530,7 @@
locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
locations->SetOut(Location::FpuRegisterLocation(XMM0));
- // We have to ensure that the native code doesn't clobber the XMM registers which are
- // non-volatile for ART, but volatile for Native calls. This will ensure that they are
- // saved in the prologue and properly restored.
- for (FloatRegister fp_reg : non_volatile_xmm_regs) {
- locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
- }
+ CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
}
void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e562b87..0eece84 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -4604,6 +4604,11 @@
// Recursive call, use local PC-relative call instruction.
kCallSelf,
+ // Use native pointer from the Artmethod*.
+ // Used for @CriticalNative to avoid going through the compiled stub. This call goes through
+ // a special resolution stub if the class is not initialized or no native code is registered.
+ kCallCriticalNative,
+
// Use code pointer from the ArtMethod*.
// Used when we don't know the target code. This is also the last-resort-kind used when
// other kinds are unimplemented or impractical (i.e. slow) on a particular architecture.
@@ -4633,9 +4638,9 @@
: HInvoke(kInvokeStaticOrDirect,
allocator,
number_of_arguments,
- // There is potentially one extra argument for the HCurrentMethod node, and
- // potentially one other if the clinit check is explicit.
- (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
+ // There is potentially one extra argument for the HCurrentMethod input,
+ // and one other if the clinit check is explicit. These can be removed later.
+ (NeedsCurrentMethodInput(dispatch_info) ? 1u : 0u) +
(clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u),
return_type,
dex_pc,
@@ -4649,17 +4654,17 @@
bool IsClonable() const override { return true; }
- void SetDispatchInfo(const DispatchInfo& dispatch_info) {
+ void SetDispatchInfo(DispatchInfo dispatch_info) {
bool had_current_method_input = HasCurrentMethodInput();
- bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind);
+ bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info);
// Using the current method is the default and once we find a better
// method load kind, we should not go back to using the current method.
DCHECK(had_current_method_input || !needs_current_method_input);
if (had_current_method_input && !needs_current_method_input) {
- DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
- RemoveInputAt(GetSpecialInputIndex());
+ DCHECK_EQ(InputAt(GetCurrentMethodIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+ RemoveInputAt(GetCurrentMethodIndex());
}
dispatch_info_ = dispatch_info;
}
@@ -4668,14 +4673,6 @@
return dispatch_info_;
}
- void AddSpecialInput(HInstruction* input) {
- // We allow only one special input.
- DCHECK(!IsStringInit() && !HasCurrentMethodInput());
- DCHECK(InputCount() == GetSpecialInputIndex() ||
- (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck()));
- InsertInputAt(GetSpecialInputIndex(), input);
- }
-
using HInstruction::GetInputRecords; // Keep the const version visible.
ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override {
ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords();
@@ -4696,7 +4693,7 @@
}
bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
- // We access the method via the dex cache so we can't do an implicit null check.
+ // We do not access the method via object reference, so we cannot do an implicit null check.
// TODO: for intrinsics we can generate implicit null checks.
return false;
}
@@ -4705,14 +4702,6 @@
return GetType() == DataType::Type::kReference && !IsStringInit();
}
- // Get the index of the special input, if any.
- //
- // If the invoke HasCurrentMethodInput(), the "special input" is the current
- // method pointer; otherwise there may be one platform-specific special input,
- // such as PC-relative addressing base.
- uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
- bool HasSpecialInput() const { return GetNumberOfArguments() != InputCount(); }
-
MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
@@ -4724,17 +4713,6 @@
GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo ||
GetMethodLoadKind() == MethodLoadKind::kBssEntry;
}
- bool HasCurrentMethodInput() const {
- // This function can be called only after the invoke has been fully initialized by the builder.
- if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
- DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod());
- return true;
- } else {
- DCHECK(InputCount() == GetSpecialInputIndex() ||
- !InputAt(GetSpecialInputIndex())->IsCurrentMethod());
- return false;
- }
- }
QuickEntrypointEnum GetStringInitEntryPoint() const {
DCHECK(IsStringInit());
@@ -4761,6 +4739,60 @@
return target_method_;
}
+ // Does this method load kind need the current method as an input?
+ static bool NeedsCurrentMethodInput(DispatchInfo dispatch_info) {
+ return dispatch_info.method_load_kind == MethodLoadKind::kRecursive ||
+ dispatch_info.method_load_kind == MethodLoadKind::kRuntimeCall ||
+ dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative;
+ }
+
+ // Get the index of the current method input.
+ size_t GetCurrentMethodIndex() const {
+ DCHECK(HasCurrentMethodInput());
+ return GetCurrentMethodIndexUnchecked();
+ }
+ size_t GetCurrentMethodIndexUnchecked() const {
+ return GetNumberOfArguments();
+ }
+
+ // Check if the method has a current method input.
+ bool HasCurrentMethodInput() const {
+ if (NeedsCurrentMethodInput(GetDispatchInfo())) {
+ DCHECK(InputAt(GetCurrentMethodIndexUnchecked()) == nullptr || // During argument setup.
+ InputAt(GetCurrentMethodIndexUnchecked())->IsCurrentMethod());
+ return true;
+ } else {
+ DCHECK(InputCount() == GetCurrentMethodIndexUnchecked() ||
+ InputAt(GetCurrentMethodIndexUnchecked()) == nullptr || // During argument setup.
+ !InputAt(GetCurrentMethodIndexUnchecked())->IsCurrentMethod());
+ return false;
+ }
+ }
+
+ // Get the index of the special input.
+ size_t GetSpecialInputIndex() const {
+ DCHECK(HasSpecialInput());
+ return GetSpecialInputIndexUnchecked();
+ }
+ size_t GetSpecialInputIndexUnchecked() const {
+ return GetNumberOfArguments() + (HasCurrentMethodInput() ? 1u : 0u);
+ }
+
+ // Check if the method has a special input.
+ bool HasSpecialInput() const {
+ size_t other_inputs =
+ GetSpecialInputIndexUnchecked() + (IsStaticWithExplicitClinitCheck() ? 1u : 0u);
+ size_t input_count = InputCount();
+ DCHECK_LE(input_count - other_inputs, 1u) << other_inputs << " " << input_count;
+ return other_inputs != input_count;
+ }
+
+ void AddSpecialInput(HInstruction* input) {
+ // We allow only one special input.
+ DCHECK(!HasSpecialInput());
+ InsertInputAt(GetSpecialInputIndexUnchecked(), input);
+ }
+
// Remove the HClinitCheck or the replacement HLoadClass (set as last input by
// PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck)
// instruction; only relevant for static calls with explicit clinit check.
@@ -4788,11 +4820,6 @@
return IsStatic() && (GetClinitCheckRequirement() == ClinitCheckRequirement::kImplicit);
}
- // Does this method load kind need the current method as an input?
- static bool NeedsCurrentMethodInput(MethodLoadKind kind) {
- return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kRuntimeCall;
- }
-
DECLARE_INSTRUCTION(InvokeStaticOrDirect);
protected:
@@ -4815,6 +4842,7 @@
DispatchInfo dispatch_info_;
};
std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::CodePtrLocation rhs);
std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
class HInvokeVirtual final : public HInvoke {
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 4ff293c..3ea1918 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -195,15 +195,6 @@
void HandleInvoke(HInvoke* invoke) {
HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
- // We can't add the method address if we already have a current method pointer.
- // This may arise when sharpening doesn't remove the current method pointer from the invoke.
- if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) {
- // Note: This happens only for recursive calls (including compiling an intrinsic
- // by faking a call to itself; we use kRuntimeCall for this case).
- DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind());
- return;
- }
-
// If this is an invoke-static/-direct with PC-relative addressing (within boot image
// or using .bss or .data.bimg.rel.ro), we need the PC-relative address base.
bool base_added = false;
@@ -246,7 +237,6 @@
// This intrinsic needs the constant area.
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
- DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
invoke_static_or_direct->AddSpecialInput(method_address);
}
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1539421..04a8eab 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -124,6 +124,13 @@
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
}
+ if (method_load_kind != HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall &&
+ callee->IsCriticalNative()) {
+ DCHECK_NE(method_load_kind, HInvokeStaticOrDirect::MethodLoadKind::kRecursive);
+ DCHECK(callee->IsStatic());
+ code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative;
+ }
+
if (codegen->GetGraph()->IsDebuggable()) {
// For debuggable apps always use the code pointer from ArtMethod
// so that we don't circumvent instrumentation stubs if installed.
diff --git a/dex2oat/linker/image_writer.cc b/dex2oat/linker/image_writer.cc
index c8f36cc..31d5e99 100644
--- a/dex2oat/linker/image_writer.cc
+++ b/dex2oat/linker/image_writer.cc
@@ -3432,6 +3432,9 @@
CopyAndFixupPointer(copy, ArtMethod::DataOffset(target_ptr_size_), orig_table);
} else if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
quick_code = GetOatAddress(StubType::kQuickResolutionTrampoline);
+ // Set JNI entrypoint for resolving @CriticalNative methods called from compiled code .
+ const void* jni_code = GetOatAddress(StubType::kJNIDlsymLookupCriticalTrampoline);
+ copy->SetEntryPointFromJniPtrSize(jni_code, target_ptr_size_);
} else {
bool found_one = false;
for (size_t i = 0; i < static_cast<size_t>(CalleeSaveType::kLastCalleeSaveType); ++i) {
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 1f843b3..103f60f 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -2375,12 +2375,22 @@
} else if (method->IsAbstract() || method->IsClassInitializer()) {
// Don't print information for these.
} else if (method->IsRuntimeMethod()) {
- ImtConflictTable* table = method->GetImtConflictTable(image_header_.GetPointerSize());
- if (table != nullptr) {
- indent_os << "IMT conflict table " << table << " method: ";
- for (size_t i = 0, count = table->NumEntries(pointer_size); i < count; ++i) {
- indent_os << ArtMethod::PrettyMethod(table->GetImplementationMethod(i, pointer_size))
- << " ";
+ if (method == Runtime::Current()->GetResolutionMethod()) {
+ const void* resolution_trampoline =
+ method->GetEntryPointFromQuickCompiledCodePtrSize(image_header_.GetPointerSize());
+ indent_os << StringPrintf("Resolution trampoline: %p\n", resolution_trampoline);
+ const void* critical_native_resolution_trampoline =
+ method->GetEntryPointFromJniPtrSize(image_header_.GetPointerSize());
+ indent_os << StringPrintf("Resolution trampoline for @CriticalNative: %p\n",
+ critical_native_resolution_trampoline);
+ } else {
+ ImtConflictTable* table = method->GetImtConflictTable(image_header_.GetPointerSize());
+ if (table != nullptr) {
+ indent_os << "IMT conflict table " << table << " method: ";
+ for (size_t i = 0, count = table->NumEntries(pointer_size); i < count; ++i) {
+ indent_os << ArtMethod::PrettyMethod(table->GetImplementationMethod(i, pointer_size))
+ << " ";
+ }
}
}
} else {
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index 5b51e51..7ffdf18 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -18,6 +18,7 @@
#define ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_S_
#include "asm_support_arm.h"
+#include "interpreter/cfi_asm_support.h"
// Define special registers.
@@ -37,6 +38,16 @@
.arch armv7-a
.thumb
+.macro CFI_EXPRESSION_BREG n, b, offset
+ .if (-0x40 <= (\offset)) && ((\offset) < 0x40)
+ CFI_EXPRESSION_BREG_1(\n, \b, \offset)
+ .elseif (-0x2000 <= (\offset)) && ((\offset) < 0x2000)
+ CFI_EXPRESSION_BREG_2(\n, \b, \offset)
+ .else
+ .error "Unsupported offset"
+ .endif
+.endm
+
// Macro to generate the value of Runtime::Current into rDest. As it uses labels
// then the labels need to be unique. We bind these to the function name in the ENTRY macros.
.macro RUNTIME_CURRENT name, num, rDest
@@ -149,6 +160,16 @@
#endif // USE_HEAP_POISONING
.endm
+.macro INCREASE_FRAME frame_adjustment
+ sub sp, sp, #(\frame_adjustment)
+ .cfi_adjust_cfa_offset (\frame_adjustment)
+.endm
+
+.macro DECREASE_FRAME frame_adjustment
+ add sp, sp, #(\frame_adjustment)
+ .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
+
// Macro to refresh the Marking Register (R8).
//
// This macro must be called at the end of functions implementing
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S
index ceef772..3c506b0 100644
--- a/runtime/arch/arm/jni_entrypoints_arm.S
+++ b/runtime/arch/arm/jni_entrypoints_arm.S
@@ -24,10 +24,6 @@
ENTRY art_jni_dlsym_lookup_stub
push {r0, r1, r2, r3, lr} @ spill regs
.cfi_adjust_cfa_offset 20
- .cfi_rel_offset r0, 0
- .cfi_rel_offset r1, 4
- .cfi_rel_offset r2, 8
- .cfi_rel_offset r3, 12
.cfi_rel_offset lr, 16
sub sp, #12 @ pad stack pointer to align frame
.cfi_adjust_cfa_offset 12
@@ -40,10 +36,10 @@
ldr ip, [ip] // ArtMethod* method
ldr ip, [ip, #ART_METHOD_ACCESS_FLAGS_OFFSET] // uint32_t access_flags
tst ip, #(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE)
- bne .Llookup_stub_fast_native
+ bne .Llookup_stub_fast_or_critical_native
blx artFindNativeMethod
b .Llookup_stub_continue
-.Llookup_stub_fast_native:
+.Llookup_stub_fast_or_critical_native:
blx artFindNativeMethodRunnable
.Llookup_stub_continue:
mov r12, r0 @ save result in r12
@@ -53,10 +49,6 @@
cbz r0, 1f @ is method code null?
pop {r0, r1, r2, r3, lr} @ restore regs
.cfi_adjust_cfa_offset -20
- .cfi_restore r0
- .cfi_restore r1
- .cfi_restore r2
- .cfi_restore r3
.cfi_restore lr
bx r12 @ if non-null, tail call to method's code
1:
@@ -69,29 +61,94 @@
tst r4, #1
bne art_jni_dlsym_lookup_stub
- // We need to create a GenericJNI managed frame above the stack args.
+ // Reserve space for a SaveRefsAndArgs managed frame, either for the actual runtime
+ // method or for a GenericJNI frame which is similar but has a native method and a tag.
+ // Do this eagerly, so that we can use these registers as temps without the need to
+ // save and restore them multiple times.
+ INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
- // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method
- // instead of runtime method saved at the bottom. Note that the runtime shall
- // not examine the args here, otherwise we would have to move them in registers
- // and stack to account for the difference between managed and native ABIs.
- SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
- // Save the hidden arg as method pointer, r0 in the padding.
- // (x0 is an arg in native ABI but not considered an arg in managed ABI.)
- strd r4, r0, [sp]
+ // Save args, the hidden arg and caller PC. No CFI needed for args and the hidden arg.
+ push {r0, r1, r2, r3, r4, lr}
+ .cfi_adjust_cfa_offset 24
+ .cfi_rel_offset lr, 20
- // Call artCriticalNativeOutArgsSize(method)
+ // Call artCriticalNativeFrameSize(method, caller_pc)
mov r0, r4 // r0 := method (from hidden arg)
- bl artCriticalNativeOutArgsSize
+ mov r1, lr // r1 := caller_pc
+ bl artCriticalNativeFrameSize
- // Check if we have any stack args.
- cbnz r0, .Lcritical_has_stack_args
+ // Prepare the return address for managed stack walk of the SaveRefsAndArgs frame.
+ // If we're coming from JNI stub with tail call, it is LR. If we're coming from
+ // JNI stub that saved the return address, it will be the last value we copy below.
+ // If we're coming directly from compiled code, it is LR, set further down.
+ ldr lr, [sp, #20]
- // Without stack args, the frame is fully constructed.
- // Place tagged managed sp in Thread::Current()->top_quick_frame.
- mov ip, sp
- orr ip, #1 // Tag as GenericJNI frame.
- str ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
+ // Move the stack args if any.
+ add r4, sp, #24
+ cbz r0, .Lcritical_skip_copy_args
+.Lcritical_copy_args_loop:
+ ldrd ip, lr, [r4, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
+ subs r0, r0, #8
+ strd ip, lr, [r4], #8
+ bne .Lcritical_copy_args_loop
+.Lcritical_skip_copy_args:
+ // The managed frame address is now in R4. This is conveniently a callee-save in native ABI.
+
+ // Restore args.
+ pop {r0, r1, r2, r3}
+ .cfi_adjust_cfa_offset -16
+
+ // Spill registers for the SaveRefsAndArgs frame above the stack args.
+ // Note that the runtime shall not examine the args here, otherwise we would have to
+ // move them in registers and stack to account for the difference between managed and
+ // native ABIs.
+ add ip, r4, #FRAME_SIZE_SAVE_REFS_AND_ARGS - 40
+ stmia ip, {r1-r3, r5-r8, r10-r11, lr} // LR: Save return address for tail call from JNI stub.
+ // (If there were any stack args, we're storing the value that's already there.
+ // For direct calls from compiled managed code, we shall overwrite this below.)
+ // Skip args r1-r3.
+ CFI_EXPRESSION_BREG 5, 4, FRAME_SIZE_SAVE_REFS_AND_ARGS - 28
+ CFI_EXPRESSION_BREG 6, 4, FRAME_SIZE_SAVE_REFS_AND_ARGS - 24
+ CFI_EXPRESSION_BREG 7, 4, FRAME_SIZE_SAVE_REFS_AND_ARGS - 20
+ CFI_EXPRESSION_BREG 8, 4, FRAME_SIZE_SAVE_REFS_AND_ARGS - 16
+ CFI_EXPRESSION_BREG 10, 4, FRAME_SIZE_SAVE_REFS_AND_ARGS - 12
+ CFI_EXPRESSION_BREG 11, 4, FRAME_SIZE_SAVE_REFS_AND_ARGS - 8
+ // The saved return PC for managed stack walk is not necessarily our LR.
+ // Skip managed FP args as these are native ABI caller-saves and not args.
+
+ // Restore the hidden arg to r1 and caller PC.
+ pop {r1, lr}
+ .cfi_adjust_cfa_offset -8
+ .cfi_restore lr
+
+ // Save our return PC in the padding.
+ str lr, [r4, #__SIZEOF_POINTER__]
+ CFI_EXPRESSION_BREG 14, 4, __SIZEOF_POINTER__
+
+ ldr ip, [r1, #ART_METHOD_ACCESS_FLAGS_OFFSET] // Load access flags.
+ add r2, r4, #1 // Prepare managed SP tagged for a GenericJNI frame.
+ tst ip, #ACCESS_FLAGS_METHOD_IS_NATIVE
+ bne .Lcritical_skip_prepare_runtime_method
+
+ // When coming from a compiled method, the return PC for managed stack walk is LR.
+ // (When coming from a compiled stub, the correct return PC is already stored above.)
+ str lr, [r4, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)]
+
+ // Replace the target method with the SaveRefsAndArgs runtime method.
+ RUNTIME_CURRENT1 r1
+ ldr r1, [r1, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
+
+ mov r2, r4 // Prepare untagged managed SP for the runtime method.
+
+.Lcritical_skip_prepare_runtime_method:
+ // Store the method on the bottom of the managed frame.
+ str r1, [r4]
+
+ // Place (maybe tagged) managed SP in Thread::Current()->top_quick_frame.
+ str r2, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
+
+ // Preserve the native arg register r0 in callee-save register r10 which was saved above.
+ mov r10, r0
// Call artFindNativeMethodRunnable()
mov r0, rSELF // pass Thread::Current()
@@ -100,150 +157,88 @@
// Store result in scratch reg.
mov ip, r0
- // Restore frame.
- .cfi_remember_state
- ldrd r4, r0, [sp]
- RESTORE_SAVE_REFS_AND_ARGS_FRAME
- REFRESH_MARKING_REGISTER
+ // Restore the native arg register r0.
+ mov r0, r10
- // Check for exception.
- cmp ip, #0
- beq .Lcritical_deliver_exception
-
- // Do the tail call.
- bx ip
- .cfi_restore_state
- .cfi_def_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS
-
-.Lcritical_has_stack_args:
- // Move the out args size to a scratch register.
- mov ip, r0
-
- // Restore register args as we're about to move stack args.
- ldrd r4, r0, [sp]
- RESTORE_SAVE_REFS_AND_ARGS_FRAME
-
- // Reserve space for SaveRefsAndArgs frame.
- sub sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS
- .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS
-
- // Save arg regs so that we can use them as temporaries.
- push {r0-r3}
- .cfi_adjust_cfa_offset 16
-
- // Move out args. For simplicity include the return address at the end.
- add r0, sp, #16 // Destination.
- add ip, r0, ip // Destination end.
-1:
- ldrd r2, r3, [r0, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
- strd r2, r3, [r0], #8
- cmp r0, ip
- bne 1b
-
- // Save our LR, load caller's LR and redefine CFI to take ownership of the JNI stub frame.
- str lr, [ip, #-__SIZEOF_POINTER__]
- mov lr, r3 // The last moved value from the loop above.
- .cfi_def_cfa ip, FRAME_SIZE_SAVE_REFS_AND_ARGS
-
- // Restore arg regs.
- pop {r0-r3} // No `.cfi_adjust_cfa_offset`, CFA register is currently ip, not sp.
-
- // Re-create the SaveRefsAndArgs frame above the args.
- strd r4, r0, [ip] // r0 in the padding as before.
- add r4, ip, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40
- stmia r4, {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args.
- .cfi_rel_offset r1, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 0
- .cfi_rel_offset r2, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 4
- .cfi_rel_offset r3, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 8
- .cfi_rel_offset r5, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 12
- .cfi_rel_offset r6, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 16
- .cfi_rel_offset r7, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 20
- .cfi_rel_offset r8, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 24
- .cfi_rel_offset r10, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 28
- .cfi_rel_offset r11, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 32
- .cfi_rel_offset lr, FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 + 36
- vstmdb r4!, {s0-s15} @ 16 words of float args.
-
- // Move the frame register to a callee-save register.
- mov r11, ip
- .cfi_def_cfa_register r11
-
- // Place tagged managed sp in Thread::Current()->top_quick_frame.
- orr ip, r11, #1 // Tag as GenericJNI frame.
- str ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
-
- // Call artFindNativeMethodRunnable()
- mov r0, rSELF // pass Thread::Current()
- bl artFindNativeMethodRunnable
-
- // Store result in scratch reg.
- mov ip, r0
-
- // Restore the frame. We shall not need the method anymore, so use r4 as scratch register.
- mov r4, r11
- .cfi_def_cfa_register r4
- ldr r0, [r4, #4]
- add r11, r4, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - 40 - 64)
- vldmia r11!, {s0-s15} @ 16 words of float args.
- ldmia r11, {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args.
- .cfi_restore r1
- .cfi_restore r2
- .cfi_restore r3
+ // Restore the frame. We shall not need the method anymore.
+ add r1, r4, #FRAME_SIZE_SAVE_REFS_AND_ARGS - 40
+ ldmia r1, {r1-r3, r5-r8, r10-r11}
.cfi_restore r5
.cfi_restore r6
.cfi_restore r7
.cfi_restore r8
.cfi_restore r10
.cfi_restore r11
- .cfi_restore lr
+
REFRESH_MARKING_REGISTER
- // Check for exception.
+ // Check for exception before moving args back to keep the return PC for managed stack walk.
cmp ip, #0
- beq 3f
+ beq .Lcritical_deliver_exception
- // Save arg regs so that we can use them as temporaries.
- push {r0-r3} // No `.cfi_adjust_cfa_offset`, CFA register is currently r4, not sp.
+ .cfi_remember_state
+
+ // Restore our return PC.
+ ldr lr, [r4, #__SIZEOF_POINTER__]
+ .cfi_restore lr
// Move stack args to their original place.
- mov r0, r4
- add r1, sp, #16
-2:
- ldrd r2, r3, [r0, #-8]!
- strd r2, r3, [r0, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
- cmp r1, r0
- bne 2b
-
- // Replace original return address with caller's return address.
- ldr r1, [r4, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)]
- str lr, [r4, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)]
-
- // Restore LR and redefine CFI to release ownership of the JNI stub frame.
- .cfi_remember_state
- mov lr, r1
- .cfi_def_cfa sp, FRAME_SIZE_SAVE_REFS_AND_ARGS + 16
-
- // Restore args
- pop {r0-r3}
+ cmp sp, r4
+ beq .Lcritical_skip_copy_args_back
+ push {r0, r1, r2, r3}
+ .cfi_adjust_cfa_offset 16
+ add r0, sp, #16
+ sub r0, r4, r0
+.Lcritical_copy_args_loop_back:
+ ldrd r2, r3, [r4, #-8]!
+ subs r0, r0, #8
+ strd r2, r3, [r4, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
+ bne .Lcritical_copy_args_loop_back
+ pop {r0, r1, r2, r3}
.cfi_adjust_cfa_offset -16
+.Lcritical_skip_copy_args_back:
// Remove the frame reservation.
- add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS
- .cfi_adjust_cfa_offset -FRAME_SIZE_SAVE_REFS_AND_ARGS
+ DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
// Do the tail call.
bx ip
.cfi_restore_state
- .cfi_def_cfa x4, FRAME_SIZE_SAVE_REFS_AND_ARGS
-
-3:
- // Drop stack args and the SaveRefsAndArgs reservation.
- mov sp, r4
- add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS
- .cfi_def_cfa sp, 0
+ .cfi_def_cfa sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
.Lcritical_deliver_exception:
- // When delivering exception, we check that rSELF was saved but the SaveRefsAndArgs frame does
- // not save it, so we cannot use DELIVER_PENDING_EXCEPTION_FRAME_READY with the above frames.
- DELIVER_PENDING_EXCEPTION
+ // The exception delivery checks that rSELF was saved but the SaveRefsAndArgs
+ // frame does not save it, so we cannot use the existing SaveRefsAndArgs frame.
+ // That's why we checked for exception after restoring registers from it.
+ // We need to build a SaveAllCalleeSaves frame instead. Args are irrelevant at this
+ // point but keep the area allocated for stack args to keep CFA definition simple.
+#if FRAME_SIZE_SAVE_REFS_AND_ARGS != FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+# error "Expected FRAME_SIZE_SAVE_REFS_AND_ARGS == FRAME_SIZE_SAVE_ALL_CALLEE_SAVES"
+ // Otherwise we would need to adjust SP and R4 and move our return PC which is at [R4, #4].
+ // (Luckily, both SaveRefsAndArgs and SaveAllCalleeSaves frames have padding there.)
+#endif
+
+ // Spill registers for the SaveAllCalleeSaves frame above the stack args area.
+ add ip, r4, #FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 32
+ stmia ip, {r5-r11} // Keep the caller PC for managed stack walk.
+ CFI_EXPRESSION_BREG 5, 4, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 32
+ CFI_EXPRESSION_BREG 6, 4, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 28
+ CFI_EXPRESSION_BREG 7, 4, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 24
+ CFI_EXPRESSION_BREG 8, 4, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 20
+ CFI_EXPRESSION_BREG 9, 4, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 16
+ CFI_EXPRESSION_BREG 10, 4, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 12
+ CFI_EXPRESSION_BREG 11, 4, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES - 8
+ // Skip R4, it is callee-save in managed ABI.
+ add ip, r4, #12
+ vstmia ip, {s16-s31}
+
+ // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves] to the managed frame.
+ RUNTIME_CURRENT2 ip
+ ldr ip, [ip, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
+ str ip, [r4]
+
+ // Place the managed frame SP in Thread::Current()->top_quick_frame.
+ str r4, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
+
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END art_jni_dlsym_lookup_critical_stub
diff --git a/runtime/arch/arm/jni_frame_arm.h b/runtime/arch/arm/jni_frame_arm.h
index 5203eaf..2263873 100644
--- a/runtime/arch/arm/jni_frame_arm.h
+++ b/runtime/arch/arm/jni_frame_arm.h
@@ -38,9 +38,8 @@
// Note: AAPCS is soft-float, so these are all core registers.
constexpr size_t kJniArgumentRegisterCount = 4u;
-// Get the size of "out args" for @CriticalNative method stub.
-// This must match the size of the frame emitted by the JNI compiler at the native call site.
-inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) {
+// Get stack args size for @CriticalNative method calls.
+inline size_t GetCriticalNativeCallArgsSize(const char* shorty, uint32_t shorty_len) {
DCHECK_EQ(shorty_len, strlen(shorty));
size_t reg = 0; // Register for the current argument; if reg >= 4, we shall use stack.
@@ -54,7 +53,14 @@
reg += 1u;
}
size_t stack_args = std::max(reg, kJniArgumentRegisterCount) - kJniArgumentRegisterCount;
- size_t size = kFramePointerSize * stack_args;
+ return kFramePointerSize * stack_args;
+}
+
+// Get the frame size for @CriticalNative method stub.
+// This must match the size of the frame emitted by the JNI compiler at the native call site.
+inline size_t GetCriticalNativeStubFrameSize(const char* shorty, uint32_t shorty_len) {
+ // The size of outgoing arguments.
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
// Check if this is a tail call, i.e. there are no stack args and the return type
// is not an FP type (otherwise we need to move the result to FP register).
@@ -65,6 +71,16 @@
return RoundUp(size, kAapcsStackAlignment);
}
+// Get the frame size for direct call to a @CriticalNative method.
+// This must match the size of the extra frame emitted by the compiler at the native call site.
+inline size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len) {
+ // The size of outgoing arguments.
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
+
+ // No return PC to save, zero- and sign-extension and FP value moves are handled by the caller.
+ return RoundUp(size, kAapcsStackAlignment);
+}
+
} // namespace arm
} // namespace art
diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S
index fd5c852..f7fa7df 100644
--- a/runtime/arch/arm64/asm_support_arm64.S
+++ b/runtime/arch/arm64/asm_support_arm64.S
@@ -18,6 +18,7 @@
#define ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_S_
#include "asm_support_arm64.h"
+#include "interpreter/cfi_asm_support.h"
// Define special registers.
@@ -40,6 +41,16 @@
#define wMR w20
#endif
+.macro CFI_EXPRESSION_BREG n, b, offset
+ .if (-0x40 <= (\offset)) && ((\offset) < 0x40)
+ CFI_EXPRESSION_BREG_1(\n, \b, \offset)
+ .elseif (-0x2000 <= (\offset)) && ((\offset) < 0x2000)
+ CFI_EXPRESSION_BREG_2(\n, \b, \offset)
+ .else
+ .error "Unsupported offset"
+ .endif
+.endm
+
.macro ENTRY_ALIGNED name, alignment
.type \name, #function
.hidden \name // Hide this as a global symbol, so we do not incur plt calls.
@@ -98,11 +109,15 @@
.cfi_rel_offset \reg, (\offset)
.endm
-.macro RESTORE_REG reg, offset
- ldr \reg, [sp, #(\offset)]
+.macro RESTORE_REG_BASE base, reg, offset
+ ldr \reg, [\base, #(\offset)]
.cfi_restore \reg
.endm
+.macro RESTORE_REG reg, offset
+ RESTORE_REG_BASE sp, \reg, \offset
+.endm
+
.macro SAVE_TWO_REGS_BASE base, reg1, reg2, offset
stp \reg1, \reg2, [\base, #(\offset)]
.cfi_rel_offset \reg1, (\offset)
@@ -125,11 +140,11 @@
.macro LOAD_RUNTIME_INSTANCE reg
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
- adrp xIP0, :pg_hi21_nc:_ZN3art7Runtime9instance_E
+ adrp \reg, :pg_hi21_nc:_ZN3art7Runtime9instance_E
#else
- adrp xIP0, _ZN3art7Runtime9instance_E
+ adrp \reg, _ZN3art7Runtime9instance_E
#endif
- ldr xIP0, [xIP0, #:lo12:_ZN3art7Runtime9instance_E]
+ ldr \reg, [\reg, #:lo12:_ZN3art7Runtime9instance_E]
.endm
// Macro to refresh the Marking Register (W20).
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index 8a34662..f72bc55 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -55,10 +55,10 @@
ldr xIP0, [xIP0, #ART_METHOD_ACCESS_FLAGS_OFFSET] // uint32_t access_flags
mov xIP1, #(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE)
tst xIP0, xIP1
- b.ne .Llookup_stub_fast_native
+ b.ne .Llookup_stub_fast_or_critical_native
bl artFindNativeMethod
b .Llookup_stub_continue
- .Llookup_stub_fast_native:
+ .Llookup_stub_fast_or_critical_native:
bl artFindNativeMethodRunnable
.Llookup_stub_continue:
mov x17, x0 // store result in scratch reg.
@@ -97,136 +97,235 @@
// For Generic JNI we already have a managed frame, so we reuse the art_jni_dlsym_lookup_stub.
tbnz x15, #0, art_jni_dlsym_lookup_stub
- // We need to create a GenericJNI managed frame above the stack args.
+ // Save args, the hidden arg and caller PC. No CFI needed for args and the hidden arg.
+ stp x0, x1, [sp, #-(8 * 8 + 8 * 8 + 2 * 8)]!
+ .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + 2 * 8)
+ stp x2, x3, [sp, #16]
+ stp x4, x5, [sp, #32]
+ stp x6, x7, [sp, #48]
+ stp d0, d1, [sp, #64]
+ stp d2, d3, [sp, #80]
+ stp d4, d5, [sp, #96]
+ stp d6, d7, [sp, #112]
+ stp x15, lr, [sp, #128]
+ .cfi_rel_offset lr, 136
- // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method
- // instead of runtime method saved at the bottom. Note that the runtime shall
- // not examine the args here, otherwise we would have to move them in registers
- // and stack to account for the difference between managed and native ABIs.
- INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
- SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp
- // Save the hidden arg as method pointer, x0 in the padding.
- // (x0 is an arg in native ABI but not considered an arg in managed ABI.)
- SAVE_TWO_REGS x15, x0, 0
-
- // Call artCriticalNativeOutArgsSize(method)
+ // Call artCriticalNativeFrameSize(method, caller_pc)
mov x0, x15 // x0 := method (from hidden arg)
- bl artCriticalNativeOutArgsSize
+ mov x1, lr // x1 := caller_pc
+ bl artCriticalNativeFrameSize
- // Check if we have any stack args.
- cbnz x0, .Lcritical_has_stack_args
+ // Move frame size to x14.
+ mov x14, x0
- // Without stack args, the frame is fully constructed.
- // Place tagged managed sp in Thread::Current()->top_quick_frame.
- mov xIP0, sp
- orr xIP0, xIP0, #1 // Tag as GenericJNI frame.
- str xIP0, [xSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
+ // Restore args, the hidden arg and caller PC.
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
+ ldp d0, d1, [sp, #64]
+ ldp d2, d3, [sp, #80]
+ ldp d4, d5, [sp, #96]
+ ldp d6, d7, [sp, #112]
+ ldp x15, lr, [sp, #128]
+ .cfi_restore lr
+ ldp x0, x1, [sp], #(8 * 8 + 8 * 8 + 2 * 8)
+ .cfi_adjust_cfa_offset -(8 * 8 + 8 * 8 + 2 * 8)
+
+ // Reserve space for a SaveRefsAndArgs managed frame, either for the actual runtime
+ // method or for a GenericJNI frame which is similar but has a native method and a tag.
+ INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
+
+ // Calculate the base address of the managed frame.
+ add x13, sp, x14
+
+ // Prepare the return address for managed stack walk of the SaveRefsAndArgs frame.
+ // If we're coming from JNI stub with tail call, it is LR. If we're coming from
+ // JNI stub that saved the return address, it will be the last value we copy below.
+ // If we're coming directly from compiled code, it is LR, set further down.
+ mov xIP1, lr
+
+ // Move the stack args if any.
+ cbz x14, .Lcritical_skip_copy_args
+ mov x12, sp
+.Lcritical_copy_args_loop:
+ ldp xIP0, xIP1, [x12, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
+ subs x14, x14, #16
+ stp xIP0, xIP1, [x12], #16
+ bne .Lcritical_copy_args_loop
+.Lcritical_skip_copy_args:
+
+ // Spill registers for the SaveRefsAndArgs frame above the stack args.
+ // Note that the runtime shall not examine the args here, otherwise we would have to
+ // move them in registers and stack to account for the difference between managed and
+ // native ABIs. Do not update CFI while we hold the frame address in x13 and the values
+ // in registers are unchanged.
+ stp d0, d1, [x13, #16]
+ stp d2, d3, [x13, #32]
+ stp d4, d5, [x13, #48]
+ stp d6, d7, [x13, #64]
+ stp x1, x2, [x13, #80]
+ stp x3, x4, [x13, #96]
+ stp x5, x6, [x13, #112]
+ stp x7, x20, [x13, #128]
+ stp x21, x22, [x13, #144]
+ stp x23, x24, [x13, #160]
+ stp x25, x26, [x13, #176]
+ stp x27, x28, [x13, #192]
+ stp x29, xIP1, [x13, #208] // xIP1: Save return address for tail call from JNI stub.
+ // (If there were any stack args, we're storing the value that's already there.
+ // For direct calls from compiled managed code, we shall overwrite this below.)
+
+ // Move the managed frame address to native callee-save register x29 and update CFI.
+ mov x29, x13
+ // Skip args d0-d7, x1-x7
+ CFI_EXPRESSION_BREG 20, 29, 136
+ CFI_EXPRESSION_BREG 21, 29, 144
+ CFI_EXPRESSION_BREG 22, 29, 152
+ CFI_EXPRESSION_BREG 23, 29, 160
+ CFI_EXPRESSION_BREG 24, 29, 168
+ CFI_EXPRESSION_BREG 25, 29, 176
+ CFI_EXPRESSION_BREG 26, 29, 184
+ CFI_EXPRESSION_BREG 27, 29, 192
+ CFI_EXPRESSION_BREG 28, 29, 200
+ CFI_EXPRESSION_BREG 29, 29, 208
+ // The saved return PC for managed stack walk is not necessarily our LR.
+
+ // Save our return PC in the padding.
+ str lr, [x29, #__SIZEOF_POINTER__]
+ CFI_EXPRESSION_BREG 30, 29, __SIZEOF_POINTER__
+
+ ldr wIP0, [x15, #ART_METHOD_ACCESS_FLAGS_OFFSET] // Load access flags.
+ add x14, x29, #1 // Prepare managed SP tagged for a GenericJNI frame.
+ tbnz wIP0, #ACCESS_FLAGS_METHOD_IS_NATIVE_BIT, .Lcritical_skip_prepare_runtime_method
+
+ // When coming from a compiled method, the return PC for managed stack walk is LR.
+ // (When coming from a compiled stub, the correct return PC is already stored above.)
+ str lr, [x29, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)]
+
+ // Replace the target method with the SaveRefsAndArgs runtime method.
+ LOAD_RUNTIME_INSTANCE x15
+ ldr x15, [x15, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
+
+ mov x14, x29 // Prepare untagged managed SP for the runtime method.
+
+.Lcritical_skip_prepare_runtime_method:
+ // Store the method on the bottom of the managed frame.
+ str x15, [x29]
+
+ // Place (maybe tagged) managed SP in Thread::Current()->top_quick_frame.
+ str x14, [xSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
+
+ // Preserve the native arg register x0 in callee-save register x28 which was saved above.
+ mov x28, x0
// Call artFindNativeMethodRunnable()
mov x0, xSELF // pass Thread::Current()
bl artFindNativeMethodRunnable
// Store result in scratch reg.
- mov xIP0, x0
+ mov x13, x0
- // Restore frame.
+ // Restore the native arg register x0.
+ mov x0, x28
+
+ // Restore our return PC.
+ RESTORE_REG_BASE x29, lr, __SIZEOF_POINTER__
+
+ // Remember the stack args size, negated because SP cannot be on the right-hand side in SUB.
+ sub x14, sp, x29
+
+ // Restore the frame. We shall not need the method anymore.
+ ldp d0, d1, [x29, #16]
+ ldp d2, d3, [x29, #32]
+ ldp d4, d5, [x29, #48]
+ ldp d6, d7, [x29, #64]
+ ldp x1, x2, [x29, #80]
+ ldp x3, x4, [x29, #96]
+ ldp x5, x6, [x29, #112]
+ ldp x7, x20, [x29, #128]
+ .cfi_restore x20
+ RESTORE_TWO_REGS_BASE x29, x21, x22, 144
+ RESTORE_TWO_REGS_BASE x29, x23, x24, 160
+ RESTORE_TWO_REGS_BASE x29, x25, x26, 176
+ RESTORE_TWO_REGS_BASE x29, x27, x28, 192
+ RESTORE_REG_BASE x29, x29, 208
+
+ REFRESH_MARKING_REGISTER
+
+ // Check for exception before moving args back to keep the return PC for managed stack walk.
+ cbz x13, .Lcritical_deliver_exception
+
.cfi_remember_state
- RESTORE_TWO_REGS x15, x0, 0
- RESTORE_SAVE_REFS_AND_ARGS_FRAME
- REFRESH_MARKING_REGISTER
-
- // Check for exception.
- cbz xIP0, .Lcritical_deliver_exception
-
- // Do the tail call
- br xIP0
- .cfi_restore_state
- .cfi_def_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS
-
-.Lcritical_has_stack_args:
- // Move the out args size to a scratch register.
- mov xIP0, x0
-
- // Restore register args as we're about to move stack args.
- RESTORE_TWO_REGS x15, x0, 0
- RESTORE_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp
-
- // Move out args. For simplicity include the return address at the end.
- mov x8, sp // Destination.
- add x9, sp, xIP0 // Destination end.
-1:
- ldp x10, x11, [x8, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
- stp x10, x11, [x8], #16
- cmp x8, x9
- bne 1b
-
- // Save our LR, load caller's LR and redefine CFI to take ownership of the JNI stub frame.
- str xLR, [x9, #-__SIZEOF_POINTER__]
- mov xLR, x11 // The last moved value from the loop above.
- .cfi_def_cfa x9, FRAME_SIZE_SAVE_REFS_AND_ARGS
-
- // Re-create the SaveRefsAndArgs frame above the args.
- SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL x9
- SAVE_TWO_REGS_BASE x9, x15, x0, 0
-
- // Move the frame register to a callee-save register.
- mov x29, x9
- .cfi_def_cfa_register x29
-
- // Place tagged managed sp in Thread::Current()->top_quick_frame.
- orr xIP0, x29, #1 // Tag as GenericJNI frame.
- str xIP0, [xSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
-
- // Call artFindNativeMethodRunnable()
- mov x0, xSELF // pass Thread::Current()
- bl artFindNativeMethodRunnable
-
- // Store result in scratch reg.
- mov xIP0, x0
-
- // Restore the frame.
- mov x9, x29
- .cfi_def_cfa_register x9
- RESTORE_TWO_REGS_BASE x9, x15, x0, 0
- RESTORE_SAVE_REFS_AND_ARGS_FRAME_INTERNAL x9
- REFRESH_MARKING_REGISTER
-
- // Check for exception.
- cbz xIP0, 3f
// Move stack args to their original place.
- mov x8, x9
-2:
- ldp x10, x11, [x8, #-16]!
- stp x10, x11, [x8, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
- cmp sp, x8
- bne 2b
-
- // Replace original return address with caller's return address.
- ldr xIP1, [x9, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)]
- str xLR, [x9, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)]
-
- // Restore LR and redefine CFI to release ownership of the JNI stub frame.
- .cfi_remember_state
- mov xLR, xIP1
- .cfi_def_cfa sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
+ cbz x14, .Lcritical_skip_copy_args_back
+ sub x12, sp, x14
+.Lcritical_copy_args_back_loop:
+ ldp xIP0, xIP1, [x12, #-16]!
+ adds x14, x14, #16
+ stp xIP0, xIP1, [x12, #FRAME_SIZE_SAVE_REFS_AND_ARGS]
+ bne .Lcritical_copy_args_back_loop
+.Lcritical_skip_copy_args_back:
// Remove the frame reservation.
DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
// Do the tail call.
- br xIP0
+ br x13
.cfi_restore_state
- .cfi_def_cfa x9, FRAME_SIZE_SAVE_REFS_AND_ARGS
-
-3:
- // Drop stack args and the SaveRefsAndArgs reservation.
- mov sp, x9
- add sp, sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS
- .cfi_def_cfa sp, 0
+ .cfi_def_cfa sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
.Lcritical_deliver_exception:
- // When delivering exception, we check that xSELF was saved but the SaveRefsAndArgs frame does
- // not save it, so we cannot use DELIVER_PENDING_EXCEPTION_FRAME_READY with the above frames.
- DELIVER_PENDING_EXCEPTION
+ // The exception delivery checks that xSELF was saved but the SaveRefsAndArgs
+ // frame does not save it, so we cannot use the existing SaveRefsAndArgs frame.
+ // That's why we checked for exception after restoring registers from it.
+ // We need to build a SaveAllCalleeSaves frame instead. Args are irrelevant at this
+ // point but keep the area allocated for stack args to keep CFA definition simple.
+ DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+
+ // Calculate the base address of the managed frame.
+ sub x13, sp, x14
+
+ // Spill registers for the SaveAllCalleeSaves frame above the stack args area. Do not update
+ // CFI while we hold the frame address in x13 and the values in registers are unchanged.
+ stp d8, d9, [x13, #16]
+ stp d10, d11, [x13, #32]
+ stp d12, d13, [x13, #48]
+ stp d14, d15, [x13, #64]
+ stp x19, x20, [x13, #80]
+ stp x21, x22, [x13, #96]
+ stp x23, x24, [x13, #112]
+ stp x25, x26, [x13, #128]
+ stp x27, x28, [x13, #144]
+ str x29, [x13, #160]
+ // Keep the caller PC for managed stack walk.
+
+ // Move the managed frame address to native callee-save register x29 and update CFI.
+ mov x29, x13
+ CFI_EXPRESSION_BREG 19, 29, 80
+ CFI_EXPRESSION_BREG 20, 29, 88
+ CFI_EXPRESSION_BREG 21, 29, 96
+ CFI_EXPRESSION_BREG 22, 29, 104
+ CFI_EXPRESSION_BREG 23, 29, 112
+ CFI_EXPRESSION_BREG 24, 29, 120
+ CFI_EXPRESSION_BREG 25, 29, 128
+ CFI_EXPRESSION_BREG 26, 29, 136
+ CFI_EXPRESSION_BREG 27, 29, 144
+ CFI_EXPRESSION_BREG 28, 29, 152
+ CFI_EXPRESSION_BREG 29, 29, 160
+ // The saved return PC for managed stack walk is not necessarily our LR.
+
+ // Save our return PC in the padding.
+ str lr, [x29, #__SIZEOF_POINTER__]
+ CFI_EXPRESSION_BREG 30, 29, __SIZEOF_POINTER__
+
+ // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves] to the managed frame.
+ LOAD_RUNTIME_INSTANCE xIP0
+ ldr xIP0, [xIP0, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
+ str xIP0, [x29]
+
+ // Place the managed frame SP in Thread::Current()->top_quick_frame.
+ str x29, [xSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
+
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END art_jni_dlsym_lookup_critical_stub
diff --git a/runtime/arch/arm64/jni_frame_arm64.h b/runtime/arch/arm64/jni_frame_arm64.h
index fa4d43c..17e7434 100644
--- a/runtime/arch/arm64/jni_frame_arm64.h
+++ b/runtime/arch/arm64/jni_frame_arm64.h
@@ -41,28 +41,34 @@
// in registers. The rest of the args must go on the stack.
constexpr size_t kMaxIntLikeRegisterArguments = 8u;
-// Get the size of "out args" for @CriticalNative method stub.
-// This must match the size of the frame emitted by the JNI compiler at the native call site.
-inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) {
- DCHECK_EQ(shorty_len, strlen(shorty));
-
- size_t num_fp_args = 0u;
- for (size_t i = 1; i != shorty_len; ++i) {
- if (shorty[i] == 'F' || shorty[i] == 'D') {
- num_fp_args += 1u;
- }
- }
- size_t num_non_fp_args = shorty_len - 1u - num_fp_args;
-
+// Get the size of the arguments for a native call.
+inline size_t GetNativeOutArgsSize(size_t num_fp_args, size_t num_non_fp_args) {
// Account for FP arguments passed through v0-v7.
size_t num_stack_fp_args =
num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
// Account for other (integer and pointer) arguments passed through GPR (x0-x7).
size_t num_stack_non_fp_args =
num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
+ // Each stack argument takes 8 bytes.
+ return (num_stack_fp_args + num_stack_non_fp_args) * static_cast<size_t>(kArm64PointerSize);
+}
+
+// Get stack args size for @CriticalNative method calls.
+inline size_t GetCriticalNativeCallArgsSize(const char* shorty, uint32_t shorty_len) {
+ DCHECK_EQ(shorty_len, strlen(shorty));
+
+ size_t num_fp_args =
+ std::count_if(shorty + 1, shorty + shorty_len, [](char c) { return c == 'F' || c == 'D'; });
+ size_t num_non_fp_args = shorty_len - 1u - num_fp_args;
+
+ return GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
+}
+
+// Get the frame size for @CriticalNative method stub.
+// This must match the size of the extra frame emitted by the compiler at the native call site.
+inline size_t GetCriticalNativeStubFrameSize(const char* shorty, uint32_t shorty_len) {
// The size of outgoing arguments.
- size_t size =
- (num_stack_fp_args + num_stack_non_fp_args) * static_cast<size_t>(kArm64PointerSize);
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
// We can make a tail call if there are no stack args and we do not need
// to extend the result. Otherwise, add space for return PC.
@@ -72,6 +78,16 @@
return RoundUp(size, kAapcs64StackAlignment);
}
+// Get the frame size for direct call to a @CriticalNative method.
+// This must match the size of the frame emitted by the JNI compiler at the native call site.
+inline size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len) {
+ // The size of outgoing arguments.
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
+
+ // No return PC to save, zero- and sign-extension are handled by the caller.
+ return RoundUp(size, kAapcs64StackAlignment);
+}
+
} // namespace arm64
} // namespace art
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 8938d8b..1a75cbc 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -18,6 +18,7 @@
#define ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
#include "asm_support_x86.h"
+#include "interpreter/cfi_asm_support.h"
// Regular gas(1) & current clang/llvm assembler support named macro parameters.
#define MACRO0(macro_name) .macro macro_name
@@ -77,6 +78,7 @@
#define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
#define CFI_RESTORE(reg) .cfi_restore reg
#define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+ #define CFI_REGISTER(orig_reg, current_reg) .cfi_register orig_reg, current_reg
#define CFI_REMEMBER_STATE .cfi_remember_state
// The spec is not clear whether the CFA is part of the saved state and tools
// differ in the behaviour, so explicitly set the CFA to avoid any ambiguity.
@@ -93,11 +95,34 @@
#define CFI_DEF_CFA_REGISTER(reg)
#define CFI_RESTORE(reg)
#define CFI_REL_OFFSET(reg,size)
+ #define CFI_REGISTER(orig_reg, current_reg)
#define CFI_REMEMBER_STATE
#define CFI_RESTORE_STATE_AND_DEF_CFA(reg,off)
#define CFI_ESCAPE(...)
#endif
+#define CFI_REG_eax 0
+#define CFI_REG_ecx 1
+#define CFI_REG_edx 2
+#define CFI_REG_ebx 3
+#define CFI_REG_esp 4
+#define CFI_REG_ebp 5
+#define CFI_REG_esi 6
+#define CFI_REG_edi 7
+#define CFI_REG_eip 8
+
+#define CFI_REG(reg) CFI_REG_##reg
+
+MACRO3(CFI_EXPRESSION_BREG, n, b, offset)
+ .if (-0x40 <= (\offset)) && ((\offset) < 0x40)
+ CFI_EXPRESSION_BREG_1(\n, \b, \offset)
+ .elseif (-0x2000 <= (\offset)) && ((\offset) < 0x2000)
+ CFI_EXPRESSION_BREG_2(\n, \b, \offset)
+ .else
+ .error "Unsupported offset"
+ .endif
+END_MACRO
+
// Symbols. On a Mac, we need a leading underscore.
#if !defined(__APPLE__)
#define SYMBOL(name) name
@@ -172,6 +197,16 @@
CFI_RESTORE(REG_VAR(reg))
END_MACRO
+MACRO1(INCREASE_FRAME, frame_adjustment)
+ subl MACRO_LITERAL(RAW_VAR(frame_adjustment)), %esp
+ CFI_ADJUST_CFA_OFFSET((RAW_VAR(frame_adjustment)))
+END_MACRO
+
+MACRO1(DECREASE_FRAME, frame_adjustment)
+ addl MACRO_LITERAL(RAW_VAR(frame_adjustment)), %esp
+ CFI_ADJUST_CFA_OFFSET(-(RAW_VAR(frame_adjustment)))
+END_MACRO
+
#define UNREACHABLE int3
MACRO1(UNIMPLEMENTED,name)
@@ -197,6 +232,13 @@
#endif
END_MACRO
+MACRO2(LOAD_RUNTIME_INSTANCE, reg, got_reg)
+ SETUP_GOT_NOSAVE \got_reg
+ // Load Runtime::instance_ from GOT.
+ movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(reg)
+ movl (REG_VAR(reg)), REG_VAR(reg)
+END_MACRO
+
// Macros to poison (negate) the reference for heap poisoning.
MACRO1(POISON_HEAP_REF, rRef)
#ifdef USE_HEAP_POISONING
@@ -223,8 +265,7 @@
PUSH_ARG edx
PUSH_ARG ecx
// Create space for FPR args.
- subl MACRO_LITERAL(4 * 8), %esp
- CFI_ADJUST_CFA_OFFSET(4 * 8)
+ INCREASE_FRAME 4 * 8
// Save FPRs.
movsd %xmm0, 0(%esp)
movsd %xmm1, 8(%esp)
@@ -246,8 +287,7 @@
movsd 20(%esp), %xmm2
movsd 28(%esp), %xmm3
- addl MACRO_LITERAL(36), %esp // Remove FPRs and method pointer.
- CFI_ADJUST_CFA_OFFSET(-36)
+ DECREASE_FRAME 36 // Remove FPRs and method pointer.
POP_ARG ecx // Restore args
POP_ARG edx
@@ -263,12 +303,12 @@
*/
MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
// Outgoing argument set up
- subl MACRO_LITERAL(12), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(12)
+ INCREASE_FRAME 12 // alignment padding
pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
CFI_ADJUST_CFA_OFFSET(4)
call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*)
UNREACHABLE
+ CFI_ADJUST_CFA_OFFSET(-16) // Reset CFA in case there is more code afterwards.
END_MACRO
#endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index 086e96f..a1a371c 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -20,9 +20,8 @@
* Jni dlsym lookup stub.
*/
DEFINE_FUNCTION art_jni_dlsym_lookup_stub
- subl LITERAL(8), %esp // align stack
- CFI_ADJUST_CFA_OFFSET(8)
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
+ INCREASE_FRAME 8 // Align stack.
+ pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
CFI_ADJUST_CFA_OFFSET(4)
// Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable()
// for @FastNative or @CriticalNative.
@@ -32,17 +31,16 @@
movl (%eax), %eax // ArtMethod* method
testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE), \
ART_METHOD_ACCESS_FLAGS_OFFSET(%eax)
- jne .Llookup_stub_fast_native
+ jne .Llookup_stub_fast_or_critical_native
call SYMBOL(artFindNativeMethod) // (Thread*)
jmp .Llookup_stub_continue
-.Llookup_stub_fast_native:
+.Llookup_stub_fast_or_critical_native:
call SYMBOL(artFindNativeMethodRunnable) // (Thread*)
.Llookup_stub_continue:
- addl LITERAL(12), %esp // remove argument & padding
- CFI_ADJUST_CFA_OFFSET(-12)
- testl %eax, %eax // check if returned method code is null
- jz .Lno_native_code_found // if null, jump to return to handle
- jmp *%eax // otherwise, tail call to intended method
+ DECREASE_FRAME 12 // Remove argument & padding.
+ testl %eax, %eax // Check if returned method code is null.
+ jz .Lno_native_code_found // If null, jump to return to handle.
+ jmp *%eax // Otherwise, tail call to intended method.
.Lno_native_code_found:
ret
END_FUNCTION art_jni_dlsym_lookup_stub
@@ -53,31 +51,94 @@
testl LITERAL(1), %eax
jnz art_jni_dlsym_lookup_stub
- // We need to create a GenericJNI managed frame above the stack args.
+ // Since the native call args are all on the stack, we can use the managed args
+ // registers as scratch registers. So, EBX, EDX and ECX are available.
- // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method
- // instead of runtime method saved at the bottom. Note that the runtime shall
- // not examine the args here, otherwise we would have to reload them from stack
- // to account for the difference between managed and native ABIs.
- SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
- pushl %eax // Save the hidden arg as method pointer at the bottom of the stack.
- CFI_ADJUST_CFA_OFFSET(4)
+ // Load caller PC.
+ movl (%esp), %ecx
- // Call artCriticalNativeOutArgsSize(method); method is conveniently at the bottom of the stack.
- call SYMBOL(artCriticalNativeOutArgsSize)
+ // Save the caller method from the hidden arg.
+ PUSH_ARG eax
- // Check if we have any stack args other than return PC.
- cmp LITERAL(__SIZEOF_POINTER__), %eax
- jnz .Lcritical_has_stack_args
+ // Call artCriticalNativeFrameSize(method, caller_pc).
+ PUSH_ARG ecx // Pass caller PC.
+ PUSH_ARG eax // Pass method.
+ call SYMBOL(artCriticalNativeFrameSize) // (method, caller_pc)
+ DECREASE_FRAME 8 // Remove args.
- // Without stack args, the frame is fully constructed.
- // Place tagged managed sp in Thread::Current()->top_quick_frame.
- leal 1(%esp), %eax // Tag as GenericJNI frame.
- mov %eax, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+ // Restore method register to EBX.
+ POP_ARG ebx
+
+ // Load caller PC to EDX and redefine return PC for CFI.
+ movl (%esp), %edx
+ CFI_REGISTER(%eip, %edx)
+
+ // Reserve space for a SaveRefsAndArgs managed frame, either for the actual runtime
+ // method or for a GenericJNI frame which is similar but has a native method and a tag.
+ INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__
+
+ // Calculate the number of DWORDs to move.
+ movl %eax, %ecx
+ shrl LITERAL(2), %ecx
+ jecxz .Lcritical_skip_copy_args
+
+ // Save EDI, ESI so that we can use them for moving stack args.
+ PUSH edi
+ PUSH esi
+
+ // Move the stack args.
+ leal 2 * __SIZEOF_POINTER__(%esp), %edi
+ leal FRAME_SIZE_SAVE_REFS_AND_ARGS(%edi), %esi
+ rep movsd
+
+ // Restore EDI, ESI.
+ POP esi
+ POP edi
+
+.Lcritical_skip_copy_args:
+ // Calculate the base address of the managed frame.
+ leal (%esp, %eax, 1), %eax
+
+ leal 1(%eax), %ecx // Prepare namaged SP tagged for a GenericJNI frame.
+ testl LITERAL(ACCESS_FLAGS_METHOD_IS_NATIVE), ART_METHOD_ACCESS_FLAGS_OFFSET(%ebx)
+ jnz .Lcritical_skip_prepare_runtime_method
+
+ // Save the return PC for managed stack walk.
+ // (When coming from a compiled stub, the correct return PC is already there.)
+ movl %edx, FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__(%eax)
+
+ // Replace the target method with the SaveRefsAndArgs runtime method.
+ LOAD_RUNTIME_INSTANCE ecx, ebx
+ movl RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%ecx), %ebx
+
+ movl %eax, %ecx // Prepare untagged managed SP for the runtime method.
+
+.Lcritical_skip_prepare_runtime_method:
+ // Store the method on the bottom of the managed frame.
+ movl %ebx, (%eax)
+
+ // Move the managed frame address to native callee-save register EBX.
+ movl %eax, %ebx
+
+ // Spill registers for the SaveRefsAndArgs frame above the stack args.
+ movl %edi, 56(%ebx)
+ CFI_EXPRESSION_BREG CFI_REG(edi), CFI_REG(ebx), 56
+ movl %esi, 52(%ebx)
+ CFI_EXPRESSION_BREG CFI_REG(esi), CFI_REG(ebx), 52
+ movl %ebp, 48(%ebx)
+ CFI_EXPRESSION_BREG CFI_REG(ebp), CFI_REG(ebx), 48
+ // Skip managed ABI args EBX, EDX, ECX and FPRs. The runtime shall not examine the
+ // args in the managed frame. (We have already clobbered EBX, EDX, ECX anyway.)
+
+ // Place (maybe tagged) managed SP in Thread::Current()->top_quick_frame.
+ movl %ecx, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+ // Save our return PC in a slot reserved for first FP arg in managed ABI.
+ movl %edx, __SIZEOF_POINTER__(%ebx)
+ CFI_EXPRESSION_BREG CFI_REG(eip), CFI_REG(ebx), __SIZEOF_POINTER__
// Call artFindNativeMethodRunnable()
- subl LITERAL(12), %esp // align stack
- CFI_ADJUST_CFA_OFFSET(12)
+ INCREASE_FRAME 12 // Align stack.
pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
CFI_ADJUST_CFA_OFFSET(4)
call SYMBOL(artFindNativeMethodRunnable) // (Thread*)
@@ -86,87 +147,15 @@
// Check for exception.
test %eax, %eax
- jz 1f
+ jz .Lcritical_deliver_exception
- // Restore frame and do the tail call.
CFI_REMEMBER_STATE
- RESTORE_SAVE_REFS_AND_ARGS_FRAME
- jmp *%eax
- CFI_RESTORE_STATE_AND_DEF_CFA(%esp, FRAME_SIZE_SAVE_REFS_AND_ARGS)
-1:
- DELIVER_PENDING_EXCEPTION_FRAME_READY
+ // Remember our return PC in EDX.
+ movl __SIZEOF_POINTER__(%ebx), %edx
+ CFI_REGISTER(%eip, %edx)
-.Lcritical_has_stack_args:
- // As mentioned above, the runtime shall not examine the args in the managed frame
- // and since all args for the native call are on the stack, we can use the managed
- // args registers as scratch registers. So, EBX, EDX and ECX are available and we
- // do not need to restore xmm0-xmm3 either.
-
- // Restore registers as we're about to move stack args over the current SaveRefsAndArgs frame.
- movl (%esp), %edx // Remember the method in EDX.
- movl 48(%esp), %ebp
- CFI_RESTORE(%ebp)
- movl 52(%esp), %esi
- CFI_RESTORE(%esi)
- movl 56(%esp), %edi
- CFI_RESTORE(%edi)
-
- // Calculate the address of the end of the move destination and redefine CFI to take
- // ownership of the JNI stub frame. EBX is conveniently callee-save in native ABI.
- leal 0(%esp, %eax, 1), %ebx
- CFI_DEF_CFA(%ebx, FRAME_SIZE_SAVE_REFS_AND_ARGS)
-
- // Calculate the number of DWORDs to move.
- shrl LITERAL(2), %eax
- leal -1(%eax), %ecx // Do not move the return PC.
-
- // Load our return PC to EAX.
- movl FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__(%esp), %eax
-
- // Save EDI, ESI so that we can use them for moving stack args.
- pushl %edi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP.
- pushl %esi // ditto
-
- // Mov the stack args.
- leal 2 * __SIZEOF_POINTER__(%esp), %edi
- leal FRAME_SIZE_SAVE_REFS_AND_ARGS(%edi), %esi
- rep movsd
-
- // Save our return PC.
- movl %eax, (%edi)
-
- // Restore EDI, ESI.
- popl %esi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP.
- popl %edi // ditto
-
- // Re-create the SaveRefsAndArgs frame above the args.
- movl %edi, 56(%ebx)
- CFI_REL_OFFSET(%edi, 56)
- movl %esi, 52(%ebx)
- CFI_REL_OFFSET(%esi, 52)
- movl %ebp, 48(%ebx)
- CFI_REL_OFFSET(%ebp, 48)
- // Skip managed ABI args EBX, EDX, ECX and FPRs, see above.
- // (We have already clobbered EBX, EDX, ECX anyway).
- movl %edx, (%ebx) // Save method pointer.
-
- // Place tagged managed sp in Thread::Current()->top_quick_frame.
- leal 1(%ebx), %eax // Tag as GenericJNI frame.
- movl %eax, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
-
- // Call artFindNativeMethodRunnable()
- subl LITERAL(12), %esp // align stack, no `CFI_ADJUST_CFA_OFFSET`.
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
- call SYMBOL(artFindNativeMethodRunnable) // (Thread*)
- addl LITERAL(16), %esp // Pop args, no `CFI_ADJUST_CFA_OFFSET`.
-
- // Check for exception.
- test %eax, %eax
- jz 2f
-
- // Restore the frame. We shall not need the method anymore.
- CFI_REMEMBER_STATE
+ // Restore callee-save registers from the frame. We shall not need the method anymore.
movl 48(%ebx), %ebp
CFI_RESTORE(%ebp)
movl 52(%ebx), %esi
@@ -174,50 +163,39 @@
movl 56(%ebx), %edi
CFI_RESTORE(%edi)
- // Remember our return PC in EDX.
- movl -__SIZEOF_POINTER__(%ebx), %edx
-
// Calculate the number of DWORDs to move.
- leal -__SIZEOF_POINTER__(%ebx), %ecx // Do not move return PC.
+ movl %ebx, %ecx
subl %esp, %ecx
shrl LITERAL(2), %ecx
+ jecxz .Lcritical_skip_copy_args_back
// Save EDI, ESI so that we can use them for moving stack args.
- pushl %edi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP.
- pushl %esi // ditto
+ PUSH edi
+ PUSH esi
- // Mov stack args to their original place.
- leal -2 * __SIZEOF_POINTER__(%ebx), %esi
- leal FRAME_SIZE_SAVE_REFS_AND_ARGS - 2 * __SIZEOF_POINTER__(%ebx), %edi
+ // Move stack args to their original place.
+ leal -__SIZEOF_POINTER__(%ebx), %esi
+ leal FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__(%ebx), %edi
std
rep movsd
cld
- // Store our return PC.
- movl %edx, (%edi)
-
// Restore EDI, ESI.
- popl %esi // No `CFI_ADJUST_CFA_OFFSET`, CFA register is currently EBX, not ESP.
- popl %edi // ditto
+ POP esi
+ POP edi
- // Redefine CFI to release ownership of the JNI stub frame.
- CFI_DEF_CFA(%esp, FRAME_SIZE_SAVE_REFS_AND_ARGS)
-
+.Lcritical_skip_copy_args_back:
// Remove the frame reservation.
- addl LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__), %esp
- CFI_ADJUST_CFA_OFFSET(-FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)
+ DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__
+
+ // Store our return PC.
+ movl %edx, (%esp)
+ CFI_REL_OFFSET(%eip, 0)
// Do the tail call.
jmp *%eax
- CFI_RESTORE_STATE_AND_DEF_CFA(%ebx, FRAME_SIZE_SAVE_REFS_AND_ARGS)
+ CFI_RESTORE_STATE_AND_DEF_CFA(%esp, FRAME_SIZE_SAVE_REFS_AND_ARGS)
-2:
- // Replicate DELIVER_PENDING_EXCEPTION_FRAME_READY without CFI_ADJUST_CFA_OFFSET,
- // CFA register is currently EBX, not ESP.
-
- // Outgoing argument set up
- subl MACRO_LITERAL(12), %esp // alignment padding
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
- call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*)
- UNREACHABLE
+.Lcritical_deliver_exception:
+ DELIVER_PENDING_EXCEPTION_FRAME_READY
END_FUNCTION art_jni_dlsym_lookup_critical_stub
diff --git a/runtime/arch/x86/jni_frame_x86.h b/runtime/arch/x86/jni_frame_x86.h
index e710179..15ccff8 100644
--- a/runtime/arch/x86/jni_frame_x86.h
+++ b/runtime/arch/x86/jni_frame_x86.h
@@ -33,31 +33,47 @@
static constexpr size_t kNativeStackAlignment = 16; // IA-32 cdecl requires 16 byte alignment.
static_assert(kNativeStackAlignment == kStackAlignment);
-// Get the size of "out args" for @CriticalNative method stub.
-// This must match the size of the frame emitted by the JNI compiler at the native call site.
-inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) {
+// Get the size of the arguments for a native call.
+inline size_t GetNativeOutArgsSize(size_t num_args, size_t num_long_or_double_args) {
+ size_t num_arg_words = num_args + num_long_or_double_args;
+ return num_arg_words * static_cast<size_t>(kX86PointerSize);
+}
+
+// Get stack args size for @CriticalNative method calls.
+inline size_t GetCriticalNativeCallArgsSize(const char* shorty, uint32_t shorty_len) {
DCHECK_EQ(shorty_len, strlen(shorty));
- size_t num_long_or_double_args = 0u;
- for (size_t i = 1; i != shorty_len; ++i) {
- if (shorty[i] == 'J' || shorty[i] == 'D') {
- num_long_or_double_args += 1u;
- }
- }
- size_t num_arg_words = shorty_len - 1u + num_long_or_double_args;
+ size_t num_long_or_double_args =
+ std::count_if(shorty + 1, shorty + shorty_len, [](char c) { return c == 'J' || c == 'D'; });
+ return GetNativeOutArgsSize(/*num_args=*/ shorty_len - 1u, num_long_or_double_args);
+}
+
+// Get the frame size for @CriticalNative method stub.
+// This must match the size of the frame emitted by the JNI compiler at the native call site.
+inline size_t GetCriticalNativeStubFrameSize(const char* shorty, uint32_t shorty_len) {
// The size of outgoing arguments.
- size_t size = num_arg_words * static_cast<size_t>(kX86PointerSize);
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
- // Add return address size.
- size += kFramePointerSize;
// We can make a tail call if there are no stack args and the return type is not
// FP type (needs moving from ST0 to MMX0) and we do not need to extend the result.
bool return_type_ok = shorty[0] == 'I' || shorty[0] == 'J' || shorty[0] == 'V';
- if (return_type_ok && size == kFramePointerSize) {
- return kFramePointerSize;
+ if (return_type_ok && size == 0u) {
+ return 0u;
}
+ // Add return address size.
+ size += kFramePointerSize;
+ return RoundUp(size, kNativeStackAlignment);
+}
+
+// Get the frame size for direct call to a @CriticalNative method.
+// This must match the size of the extra frame emitted by the compiler at the native call site.
+inline size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len) {
+ // The size of outgoing arguments.
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
+
+ // No return PC to save, zero- and sign-extension and FP value moves are handled by the caller.
return RoundUp(size, kNativeStackAlignment);
}
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 4abdf70..b4155e0 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -31,10 +31,7 @@
PUSH ebp
subl MACRO_LITERAL(12), %esp // Grow stack by 3 words.
CFI_ADJUST_CFA_OFFSET(12)
- SETUP_GOT_NOSAVE RAW_VAR(got_reg)
- // Load Runtime::instance_ from GOT.
- movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
- movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+ LOAD_RUNTIME_INSTANCE \temp_reg, \got_reg
// Push save all callee-save method.
pushl RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(REG_VAR(temp_reg))
CFI_ADJUST_CFA_OFFSET(4)
@@ -57,10 +54,7 @@
PUSH ebp
subl MACRO_LITERAL(12), %esp // Grow stack by 3 words.
CFI_ADJUST_CFA_OFFSET(12)
- SETUP_GOT_NOSAVE RAW_VAR(got_reg)
- // Load Runtime::instance_ from GOT.
- movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
- movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+ LOAD_RUNTIME_INSTANCE \temp_reg, \got_reg
// Push save all callee-save method.
pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
CFI_ADJUST_CFA_OFFSET(4)
@@ -87,10 +81,7 @@
subl MACRO_LITERAL(8), %esp // Grow stack by 2 words.
CFI_ADJUST_CFA_OFFSET(8)
- SETUP_GOT_NOSAVE RAW_VAR(got_reg)
- // Load Runtime::instance_ from GOT.
- movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
- movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+ LOAD_RUNTIME_INSTANCE \temp_reg, \got_reg
// Push save all callee-save method.
pushl RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(REG_VAR(temp_reg))
CFI_ADJUST_CFA_OFFSET(4)
@@ -122,10 +113,7 @@
MACRO2(SETUP_SAVE_REFS_AND_ARGS_FRAME, got_reg, temp_reg)
SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
- SETUP_GOT_NOSAVE RAW_VAR(got_reg)
- // Load Runtime::instance_ from GOT.
- movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
- movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+ LOAD_RUNTIME_INSTANCE \temp_reg, \got_reg
// Push save all callee-save method.
pushl RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(REG_VAR(temp_reg))
CFI_ADJUST_CFA_OFFSET(4)
@@ -196,10 +184,7 @@
movsd %xmm6, 60(%esp)
movsd %xmm7, 68(%esp)
- SETUP_GOT_NOSAVE RAW_VAR(got_reg)
- // Load Runtime::instance_ from GOT.
- movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
- movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+ LOAD_RUNTIME_INSTANCE \temp_reg, \got_reg
// Push save everything callee-save method.
pushl \runtime_method_offset(REG_VAR(temp_reg))
CFI_ADJUST_CFA_OFFSET(4)
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 6a60a98..be9f59a 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -18,6 +18,7 @@
#define ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
#include "asm_support_x86_64.h"
+#include "interpreter/cfi_asm_support.h"
// Regular gas(1) & current clang/llvm assembler support named macro parameters.
#define MACRO0(macro_name) .macro macro_name
@@ -76,6 +77,7 @@
#define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
#define CFI_RESTORE(reg) .cfi_restore reg
#define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+ #define CFI_REGISTER(orig_reg, current_reg) .cfi_register orig_reg, current_reg
#define CFI_REMEMBER_STATE .cfi_remember_state
// The spec is not clear whether the CFA is part of the saved state and tools
// differ in the behaviour, so explicitly set the CFA to avoid any ambiguity.
@@ -92,11 +94,43 @@
#define CFI_DEF_CFA_REGISTER(reg)
#define CFI_RESTORE(reg)
#define CFI_REL_OFFSET(reg,size)
+ #define CFI_REGISTER(orig_reg, current_reg)
#define CFI_REMEMBER_STATE
#define CFI_RESTORE_STATE_AND_DEF_CFA(off)
#define CFI_RESTORE_STATE
#endif
+// The register numbers are a bit mixed up for x86-64.
+#define CFI_REG_rax 0
+#define CFI_REG_rcx 2
+#define CFI_REG_rdx 1
+#define CFI_REG_rbx 3
+#define CFI_REG_rsp 7
+#define CFI_REG_rbp 6
+#define CFI_REG_rsi 4
+#define CFI_REG_rdi 5
+#define CFI_REG_r8 8
+#define CFI_REG_r9 9
+#define CFI_REG_r10 10
+#define CFI_REG_r11 11
+#define CFI_REG_r12 12
+#define CFI_REG_r13 13
+#define CFI_REG_r14 14
+#define CFI_REG_r15 15
+#define CFI_REG_rip 16
+
+#define CFI_REG(reg) CFI_REG_##reg
+
+MACRO3(CFI_EXPRESSION_BREG, n, b, offset)
+ .if (-0x40 <= (\offset)) && ((\offset) < 0x40)
+ CFI_EXPRESSION_BREG_1(\n, \b, \offset)
+ .elseif (-0x2000 <= (\offset)) && ((\offset) < 0x2000)
+ CFI_EXPRESSION_BREG_2(\n, \b, \offset)
+ .else
+ .error "Unsupported offset"
+ .endif
+END_MACRO
+
// Symbols.
#if !defined(__APPLE__)
#define SYMBOL(name) name
@@ -178,6 +212,16 @@
CFI_RESTORE(REG_VAR(reg))
END_MACRO
+MACRO1(INCREASE_FRAME, frame_adjustment)
+ subq MACRO_LITERAL(RAW_VAR(frame_adjustment)), %rsp
+ CFI_ADJUST_CFA_OFFSET((RAW_VAR(frame_adjustment)))
+END_MACRO
+
+MACRO1(DECREASE_FRAME, frame_adjustment)
+ addq MACRO_LITERAL(RAW_VAR(frame_adjustment)), %rsp
+ CFI_ADJUST_CFA_OFFSET(-(RAW_VAR(frame_adjustment)))
+END_MACRO
+
MACRO1(UNIMPLEMENTED,name)
FUNCTION_TYPE(SYMBOL(\name))
ASM_HIDDEN VAR(name)
@@ -199,6 +243,11 @@
int3
END_MACRO
+MACRO1(LOAD_RUNTIME_INSTANCE, reg)
+ movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), REG_VAR(reg)
+ movq (REG_VAR(reg)), REG_VAR(reg)
+END_MACRO
+
// Macros to poison (negate) the reference for heap poisoning.
MACRO1(POISON_HEAP_REF, rRef)
#ifdef USE_HEAP_POISONING
@@ -223,8 +272,7 @@
int3
#else
// R10 := Runtime::Current()
- movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
- movq (%r10), %r10
+ LOAD_RUNTIME_INSTANCE r10
// Save callee and GPR args, mixed together to agree with core spills bitmap.
PUSH r15 // Callee save.
PUSH r14 // Callee save.
@@ -233,8 +281,7 @@
PUSH rbp // Callee save.
PUSH rbx // Callee save.
// Create space for FPR args, plus space for ArtMethod*.
- subq LITERAL(8 + 4 * 8), %rsp
- CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
+ INCREASE_FRAME 8 + 4 * 8
// Save FPRs.
movq %xmm12, 8(%rsp)
movq %xmm13, 16(%rsp)
@@ -260,8 +307,7 @@
movq 16(%rsp), %xmm13
movq 24(%rsp), %xmm14
movq 32(%rsp), %xmm15
- addq LITERAL(8 + 4*8), %rsp
- CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
+ DECREASE_FRAME 8 + 4*8
// TODO: optimize by not restoring callee-saves restored by the ABI
POP rbx
POP rbp
@@ -289,8 +335,7 @@
PUSH_ARG rdx // Quick arg 2.
PUSH_ARG rcx // Quick arg 3.
// Create space for FPR args and create 2 slots for ArtMethod*.
- subq MACRO_LITERAL(16 + 12 * 8), %rsp
- CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
+ INCREASE_FRAME 16 + 12 * 8
// Save FPRs.
movq %xmm0, 16(%rsp)
movq %xmm1, 24(%rsp)
@@ -326,8 +371,7 @@
movq 88(%rsp), %xmm13
movq 96(%rsp), %xmm14
movq 104(%rsp), %xmm15
- addq MACRO_LITERAL(80 + 4 * 8), %rsp
- CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
+ DECREASE_FRAME 80 + 4 * 8
// Restore callee and GPR args, mixed together to agree with core spills bitmap.
POP_ARG rcx
POP_ARG rdx
@@ -352,8 +396,7 @@
int3
#else
// R10 := Runtime::Current()
- movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
- movq (%r10), %r10
+ LOAD_RUNTIME_INSTANCE r10
// Save callee save registers to agree with core spills bitmap.
PUSH r15 // Callee save.
PUSH r14 // Callee save.
@@ -362,8 +405,7 @@
PUSH rbp // Callee save.
PUSH rbx // Callee save.
// Create space for FPR args, plus space for ArtMethod*.
- subq MACRO_LITERAL(4 * 8 + 8), %rsp
- CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
+ INCREASE_FRAME 4 * 8 + 8
// Save FPRs.
movq %xmm12, 8(%rsp)
movq %xmm13, 16(%rsp)
@@ -386,8 +428,7 @@
MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
// Create space for ART FP callee-saved registers
- subq MACRO_LITERAL(4 * 8), %rsp
- CFI_ADJUST_CFA_OFFSET(4 * 8)
+ INCREASE_FRAME 4 * 8
movq %xmm12, 0(%rsp)
movq %xmm13, 8(%rsp)
movq %xmm14, 16(%rsp)
@@ -400,8 +441,7 @@
movq 8(%rsp), %xmm13
movq 16(%rsp), %xmm14
movq 24(%rsp), %xmm15
- addq MACRO_LITERAL(4 * 8), %rsp
- CFI_ADJUST_CFA_OFFSET(- 4 * 8)
+ DECREASE_FRAME 4 * 8
END_MACRO
/*
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index 5c80589..a1b8c7b 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -28,8 +28,7 @@
PUSH_ARG rdx // Arg.
PUSH_ARG rcx // Arg.
// Create space for FPR args, plus padding for alignment
- subq LITERAL(72), %rsp
- CFI_ADJUST_CFA_OFFSET(72)
+ INCREASE_FRAME 72
// Save FPRs.
movq %xmm0, 0(%rsp)
movq %xmm1, 8(%rsp)
@@ -48,10 +47,10 @@
movq (%rax), %rax // ArtMethod* method
testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE), \
ART_METHOD_ACCESS_FLAGS_OFFSET(%rax)
- jne .Llookup_stub_fast_native
+ jne .Llookup_stub_fast_or_critical_native
call SYMBOL(artFindNativeMethod) // (Thread*)
jmp .Llookup_stub_continue
-.Llookup_stub_fast_native:
+.Llookup_stub_fast_or_critical_native:
call SYMBOL(artFindNativeMethodRunnable) // (Thread*)
.Llookup_stub_continue:
// restore arguments
@@ -63,8 +62,7 @@
movq 40(%rsp), %xmm5
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
- addq LITERAL(72), %rsp
- CFI_ADJUST_CFA_OFFSET(-72)
+ DECREASE_FRAME 72
POP_ARG rcx // Arg.
POP_ARG rdx // Arg.
POP_ARG rsi // Arg.
@@ -84,29 +82,16 @@
testq LITERAL(1), %rax
jnz art_jni_dlsym_lookup_stub
- // We need to create a GenericJNI managed frame above the stack args.
-
- // GenericJNI frame is similar to SaveRegsAndArgs frame with the native method
- // instead of runtime method saved at the bottom.
-
- // As we always have "stack args" on x86-64 (due to xmm12-xmm15 being callee-save
- // in managed ABI but caller-save in native ABI), do not create a proper frame yet
- // as we do on other architectures where it's useful for no stack args case.
-
- // Reserve space for the frame (return PC is on stack).
- subq MACRO_LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__), %rsp
- CFI_ADJUST_CFA_OFFSET(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__)
-
- // Save GPR args.
+ // Save GPR args and method.
PUSH_ARG r9
PUSH_ARG r8
PUSH_ARG rdi
PUSH_ARG rsi
PUSH_ARG rdx
PUSH_ARG rcx
+ PUSH_ARG rax
// Create space for FPR args.
- subq LITERAL(64), %rsp
- CFI_ADJUST_CFA_OFFSET(64)
+ INCREASE_FRAME 8 * 8
// Save FPRs.
movq %xmm0, 0(%rsp)
movq %xmm1, 8(%rsp)
@@ -116,119 +101,15 @@
movq %xmm5, 40(%rsp)
movq %xmm6, 48(%rsp)
movq %xmm7, 56(%rsp)
+ // Note: It's the caller's responsibility to preserve xmm12-xmm15 as the tail call
+ // to native shall always risk clobbering those.
- // Add alignment padding.
- subq MACRO_LITERAL(__SIZEOF_POINTER__), %rsp
- CFI_ADJUST_CFA_OFFSET(__SIZEOF_POINTER__)
- // Save hidden arg.
- PUSH_ARG rax
+ // Call artCriticalNativeFrameSize(method, caller_pc).
+ movq %rax, %rdi // Pass the method from hidden arg.
+ movq 120(%rsp), %rsi // Pass caller PC.
+ call SYMBOL(artCriticalNativeFrameSize)
- // Call artCriticalNativeOutArgsSize(method).
- movq %rax, %rdi // Pass the method from hidden arg.
- call SYMBOL(artCriticalNativeOutArgsSize)
-
- // Calculate the address of the end of the move destination and redefine CFI to take
- // ownership of the JNI stub frame.
- leaq 16 * __SIZEOF_POINTER__(%rsp, %rax, 1), %r10 // 16 QWORDs of registers saved above.
- CFI_DEF_CFA(%r10, FRAME_SIZE_SAVE_REFS_AND_ARGS)
-
- // Calculate the number of QWORDs to move.
- shrq LITERAL(3), %rax
- leaq -1(%rax), %rcx // Do not move the return PC.
-
- // Load our return PC to EAX.
- movq FRAME_SIZE_SAVE_REFS_AND_ARGS + (16 - 1) * __SIZEOF_POINTER__(%rsp), %rax
-
- // Mov the stack args.
- leaq 16 * __SIZEOF_POINTER__(%rsp), %rdi
- leaq FRAME_SIZE_SAVE_REFS_AND_ARGS(%rdi), %rsi
- rep movsq
-
- // Save our return PC.
- movq %rax, (%rdi)
-
- // Pop the hidden arg and alignment padding.
- popq %rax // No `.cfi_adjust_cfa_offset`, CFA register is currently R10, not RSP.
- addq MACRO_LITERAL(__SIZEOF_POINTER__), %rsp // ditto
-
- // Fill the SaveRefsAndArgs frame above the args, without actual args. Note that
- // the runtime shall not examine the args here, otherwise we would have to move them in
- // registers and stack to account for the difference between managed and native ABIs.
- SAVE_REG_BASE r10, r15, 192
- SAVE_REG_BASE r10, r14, 184
- SAVE_REG_BASE r10, r13, 176
- SAVE_REG_BASE r10, r12, 168
- // Skip args r9, r8, rsi.
- SAVE_REG_BASE r10, rbp, 136
- SAVE_REG_BASE r10, rbx, 128
- // Skip args rdx, rcx.
- // Skip args xmm0-xmm7.
- // Copy managed callee-saves xmm12-xmm15 from out args to the managed frame as they
- // may theoretically store variables or unwinding data. (The compiled stub preserves
- // them but the artCriticalNativeOutArgsSize() call above may clobber them.)
- movq -5 * __SIZEOF_POINTER__(%r10), %xmm12
- movq -4 * __SIZEOF_POINTER__(%r10), %xmm13
- movq -3 * __SIZEOF_POINTER__(%r10), %xmm14
- movq -2 * __SIZEOF_POINTER__(%r10), %xmm15
- movq %xmm12, 80(%r10)
- movq %xmm13, 88(%r10)
- movq %xmm14, 96(%r10)
- movq %xmm15, 104(%r10)
- // Save the hidden arg as method pointer at the bottom of the stack.
- movq %rax, (%r10)
-
- // Move the frame register to a callee-save register.
- movq %r10, %rbp
- CFI_DEF_CFA_REGISTER(%rbp)
-
- // Place tagged managed sp in Thread::Current()->top_quick_frame.
- leaq 1(%rbp), %rax // Tag as GenericJNI frame.
- movq %rax, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
-
- // Call artFindNativeMethodRunnable()
- movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
- call SYMBOL(artFindNativeMethodRunnable) // (Thread*)
-
- // Check for exception.
- test %rax, %rax
- jz 2f
-
- // Restore the frame. We shall not need the method anymore.
- .cfi_remember_state
- movq %rbp, %r10
- CFI_DEF_CFA_REGISTER(%r10)
- // Skip args xmm0-xmm7 and managed callee-saves xmm12-xmm15 (not needed for native call).
- // Skip args rdx, rcx.
- RESTORE_REG_BASE r10, rbx, 128
- RESTORE_REG_BASE r10, rbp, 136
- // Skip args r9, r8, rsi.
- RESTORE_REG_BASE r10, r12, 168
- RESTORE_REG_BASE r10, r13, 176
- RESTORE_REG_BASE r10, r14, 184
- RESTORE_REG_BASE r10, r15, 192
-
- // Remember our return PC in R11.
- movq -__SIZEOF_POINTER__(%r10), %r11
-
- // Calculate the number of DWORDs to move.
- leaq -(1 + 14) * __SIZEOF_POINTER__(%r10), %rcx // Do not move return PC, 14 arg regs saved.
- subq %rsp, %rcx
- shrq LITERAL(3), %rcx
-
- // Mov stack args to their original place.
- leaq -2 * __SIZEOF_POINTER__(%r10), %rsi
- leaq FRAME_SIZE_SAVE_REFS_AND_ARGS - 2 * __SIZEOF_POINTER__(%r10), %rdi
- std
- rep movsq
- cld
-
- // Store our return PC.
- movq %r11, (%rdi)
-
- // Redefine CFI to release ownership of the JNI stub frame.
- CFI_DEF_CFA(%rsp, FRAME_SIZE_SAVE_REFS_AND_ARGS + 14 * __SIZEOF_POINTER__)
-
- // Restore args.
+ // Restore registers.
movq 0(%rsp), %xmm0
movq 8(%rsp), %xmm1
movq 16(%rsp), %xmm2
@@ -237,8 +118,8 @@
movq 40(%rsp), %xmm5
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
- addq LITERAL(64), %rsp
- CFI_ADJUST_CFA_OFFSET(-64)
+ DECREASE_FRAME 8 * 8
+ POP_ARG r10 // Restore method to R10.
POP_ARG rcx
POP_ARG rdx
POP_ARG rsi
@@ -246,17 +127,185 @@
POP_ARG r8
POP_ARG r9
+ // Load caller PC to R11 and redefine return PC for CFI.
+ movq (%rsp), %r11
+ CFI_REGISTER(%rip, %r11)
+
+ // Reserve space for a SaveRefsAndArgs managed frame, either for the actual runtime
+ // method or for a GenericJNI frame which is similar but has a native method and a tag.
+ INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__
+
+ // Calculate the number of QWORDs to move.
+ shrq LITERAL(3), %rax
+ jz .Lcritical_skip_copy_args
+
+ // Save RDI, RSI, RCX so that we can use them for moving stack args.
+ PUSH_ARG rdi
+ PUSH_ARG rsi
+ PUSH_ARG rcx
+
+ // Move the stack args.
+ movq %rax, %rcx
+ leaq 3 * __SIZEOF_POINTER__(%rsp), %rdi
+ leaq FRAME_SIZE_SAVE_REFS_AND_ARGS(%rdi), %rsi
+ rep movsq
+
+ // Restore RDI, RSI, RCX.
+ POP_ARG rcx
+ POP_ARG rsi
+ POP_ARG rdi
+
+.Lcritical_skip_copy_args:
+ // Calculate the base address of the managed frame.
+ leaq (%rsp, %rax, 8), %rax
+
+ // Spill registers for the SaveRefsAndArgs frame above the stack args.
+ // Note that the runtime shall not examine the args here, otherwise we would have to
+ // move them in registers and stack to account for the difference between managed and
+ // native ABIs. Do not update CFI while we hold the frame address in RAX and the values
+ // in registers are unchanged.
+ movq %r15, 192(%rax)
+ movq %r14, 184(%rax)
+ movq %r13, 176(%rax)
+ movq %r12, 168(%rax)
+ movq %r9, 160(%rax)
+ movq %r8, 152(%rax)
+ movq %rsi, 144(%rax)
+ movq %rbp, 136(%rax)
+ movq %rbx, 128(%rax)
+ movq %rdx, 120(%rax)
+ movq %rcx, 112(%rax)
+ movq %xmm0, 16(%rax)
+ movq %xmm1, 24(%rax)
+ movq %xmm2, 32(%rax)
+ movq %xmm3, 40(%rax)
+ movq %xmm4, 48(%rax)
+ movq %xmm5, 56(%rax)
+ movq %xmm6, 64(%rax)
+ movq %xmm7, 72(%rax)
+ // Skip managed ABI callee-saves xmm12-xmm15.
+
+ // Move the managed frame address to native callee-save register RBP and update CFI.
+ movq %rax, %rbp
+ CFI_EXPRESSION_BREG CFI_REG(r15), CFI_REG(rbp), 192
+ CFI_EXPRESSION_BREG CFI_REG(r14), CFI_REG(rbp), 184
+ CFI_EXPRESSION_BREG CFI_REG(r13), CFI_REG(rbp), 176
+ CFI_EXPRESSION_BREG CFI_REG(r12), CFI_REG(rbp), 168
+ // Skip args r9, r8, rsi.
+ CFI_EXPRESSION_BREG CFI_REG(rbp), CFI_REG(rbp), 136
+ CFI_EXPRESSION_BREG CFI_REG(rbx), CFI_REG(rbp), 128
+ // Skip args rdx, rcx.
+ // Skip args xmm0-xmm7.
+
+ leaq 1(%rbp), %rax // Prepare managed SP tagged for a GenericJNI frame.
+ testl LITERAL(ACCESS_FLAGS_METHOD_IS_NATIVE), ART_METHOD_ACCESS_FLAGS_OFFSET(%r10)
+ jnz .Lcritical_skip_prepare_runtime_method
+
+ // Save the return PC for managed stack walk.
+ // (When coming from a compiled stub, the correct return PC is already there.)
+ movq %r11, FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__(%rbp)
+
+ // Replace the target method with the SaveRefsAndArgs runtime method.
+ LOAD_RUNTIME_INSTANCE r10
+ movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
+
+ movq %rbp, %rax // Prepare untagged managed SP for the runtime method.
+
+.Lcritical_skip_prepare_runtime_method:
+ // Store the method on the bottom of the managed frame.
+ movq %r10, (%rbp)
+
+ // Place (maybe tagged) managed SP in Thread::Current()->top_quick_frame.
+ movq %rax, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
+
+ // Save our return PC in the padding.
+ movq %r11, __SIZEOF_POINTER__(%rbp)
+ CFI_EXPRESSION_BREG CFI_REG(rip), CFI_REG(rbp), __SIZEOF_POINTER__
+
+ // Preserve the native arg register RDI in callee-save register RBX which was saved above.
+ movq %rdi, %rbx
+
+ // Call artFindNativeMethodRunnable()
+ movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
+ call SYMBOL(artFindNativeMethodRunnable) // (Thread*)
+
+ // Check for exception.
+ test %rax, %rax
+ jz .Lcritical_deliver_exception
+
+ CFI_REMEMBER_STATE
+
+ // Restore the native arg register RDI.
+ movq %rbx, %rdi
+
+ // Remember our return PC in R11.
+ movq __SIZEOF_POINTER__(%rbp), %r11
+ CFI_REGISTER(%rip, %r11)
+
+ // Remember the frame base address in r10 but do not redefine CFI.
+ movq %rbp, %r10
+
+ // Restore the frame. We shall not need the method anymore.
+ movq 16(%rbp), %xmm0
+ movq 24(%rbp), %xmm1
+ movq 32(%rbp), %xmm2
+ movq 40(%rbp), %xmm3
+ movq 48(%rbp), %xmm4
+ movq 56(%rbp), %xmm5
+ movq 64(%rbp), %xmm6
+ movq 72(%rbp), %xmm7
+ // Skip managed callee-saves xmm12-xmm15.
+ movq 112(%rbp), %rcx
+ movq 120(%rbp), %rdx
+ RESTORE_REG_BASE rbp, rbx, 128
+ // Delay restoring RBP as it's the managed frame base.
+ movq 144(%rbp), %rsi
+ movq 152(%rbp), %r8
+ movq 160(%rbp), %r9
+ RESTORE_REG_BASE rbp, r12, 168
+ RESTORE_REG_BASE rbp, r13, 176
+ RESTORE_REG_BASE rbp, r14, 184
+ RESTORE_REG_BASE rbp, r15, 192
+ // Restore RBP last.
+ RESTORE_REG_BASE rbp, rbp, 136
+
+ cmp %r10, %rsp
+ je .Lcritical_skip_copy_args_back
+
+ // Save RDI, RSI, RCX so that we can use them for moving stack args.
+ PUSH_ARG rdi
+ PUSH_ARG rsi
+ PUSH_ARG rcx
+
+ // Calculate the number of QWORDs to move.
+ leaq -3 * __SIZEOF_POINTER__(%r10), %rcx
+ subq %rsp, %rcx
+ shrq LITERAL(3), %rcx
+
+ // Move the stack args.
+ leaq -__SIZEOF_POINTER__(%r10), %rsi
+ leaq FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__(%r10), %rdi
+ std
+ rep movsq
+ cld
+
+ // Restore RDI, RSI, RCX.
+ POP_ARG rcx
+ POP_ARG rsi
+ POP_ARG rdi
+
+.Lcritical_skip_copy_args_back:
// Remove the frame reservation.
- addq LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__), %rsp
- CFI_ADJUST_CFA_OFFSET(-(FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__))
+ DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS - __SIZEOF_POINTER__
+
+ // Store our return PC.
+ movq %r11, (%rsp)
+ CFI_REL_OFFSET(%rip, 0)
// Do the tail call.
jmp *%rax
CFI_RESTORE_STATE_AND_DEF_CFA(%rbp, FRAME_SIZE_SAVE_REFS_AND_ARGS)
-2:
- // Drop the args from the stack (the RAX and padding was already removed).
- addq LITERAL(14 * __SIZEOF_POINTER__), %rsp
-
+.Lcritical_deliver_exception:
DELIVER_PENDING_EXCEPTION_FRAME_READY
END_FUNCTION art_jni_dlsym_lookup_critical_stub
diff --git a/runtime/arch/x86_64/jni_frame_x86_64.h b/runtime/arch/x86_64/jni_frame_x86_64.h
index 65736fe..959e266 100644
--- a/runtime/arch/x86_64/jni_frame_x86_64.h
+++ b/runtime/arch/x86_64/jni_frame_x86_64.h
@@ -46,28 +46,34 @@
// -- JNI calling convention only (Managed excludes RDI, so it's actually 5).
constexpr size_t kMaxIntLikeRegisterArguments = 6u;
-// Get the size of "out args" for @CriticalNative method stub.
-// This must match the size of the frame emitted by the JNI compiler at the native call site.
-inline size_t GetCriticalNativeOutArgsSize(const char* shorty, uint32_t shorty_len) {
- DCHECK_EQ(shorty_len, strlen(shorty));
-
- size_t num_fp_args = 0u;
- for (size_t i = 1; i != shorty_len; ++i) {
- if (shorty[i] == 'F' || shorty[i] == 'D') {
- num_fp_args += 1u;
- }
- }
- size_t num_non_fp_args = shorty_len - 1u - num_fp_args;
-
+// Get the size of the arguments for a native call.
+inline size_t GetNativeOutArgsSize(size_t num_fp_args, size_t num_non_fp_args) {
// Account for FP arguments passed through Xmm0..Xmm7.
size_t num_stack_fp_args =
num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
// Account for other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9).
size_t num_stack_non_fp_args =
num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
- // The size of outgoing arguments.
static_assert(kFramePointerSize == kMmxSpillSize);
- size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+ return (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+}
+
+// Get stack args size for @CriticalNative method calls.
+inline size_t GetCriticalNativeCallArgsSize(const char* shorty, uint32_t shorty_len) {
+ DCHECK_EQ(shorty_len, strlen(shorty));
+
+ size_t num_fp_args =
+ std::count_if(shorty + 1, shorty + shorty_len, [](char c) { return c == 'F' || c == 'D'; });
+ size_t num_non_fp_args = shorty_len - 1u - num_fp_args;
+
+ return GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
+}
+
+// Get the frame size for @CriticalNative method stub.
+// This must match the size of the frame emitted by the JNI compiler at the native call site.
+inline size_t GetCriticalNativeStubFrameSize(const char* shorty, uint32_t shorty_len) {
+ // The size of outgoing arguments.
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
// We always need to spill xmm12-xmm15 as they are managed callee-saves
// but not native callee-saves.
@@ -78,6 +84,16 @@
return RoundUp(size, kNativeStackAlignment);
}
+// Get the frame size for direct call to a @CriticalNative method.
+// This must match the size of the extra frame emitted by the compiler at the native call site.
+inline size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len) {
+ // The size of outgoing arguments.
+ size_t size = GetCriticalNativeCallArgsSize(shorty, shorty_len);
+
+ // No return PC to save, zero- and sign-extension are handled by the caller.
+ return RoundUp(size, kNativeStackAlignment);
+}
+
} // namespace x86_64
} // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 6a19bbb..e25045d 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -40,8 +40,7 @@
#else
SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
// R10 := Runtime::Current()
- movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
- movq (%r10), %r10
+ LOAD_RUNTIME_INSTANCE r10
// R10 := ArtMethod* for ref and args callee save frame method.
movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
// Store ArtMethod* to bottom of stack.
@@ -88,8 +87,7 @@
subq MACRO_LITERAL(8 + 16 * 8), %rsp
CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
// R10 := Runtime::Current()
- movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
- movq (%r10), %r10
+ LOAD_RUNTIME_INSTANCE r10
// Save FPRs.
movq %xmm0, 8(%rsp)
movq %xmm1, 16(%rsp)
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index a2a45ce..2db2faa 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -392,24 +392,6 @@
self->PopManagedStackFragment(fragment);
}
-const void* ArtMethod::RegisterNative(const void* native_method) {
- CHECK(IsNative()) << PrettyMethod();
- CHECK(native_method != nullptr) << PrettyMethod();
- void* new_native_method = nullptr;
- Runtime::Current()->GetRuntimeCallbacks()->RegisterNativeMethod(this,
- native_method,
- /*out*/&new_native_method);
- SetEntryPointFromJni(new_native_method);
- return new_native_method;
-}
-
-void ArtMethod::UnregisterNative() {
- CHECK(IsNative()) << PrettyMethod();
- // restore stub to lookup native pointer via dlsym
- SetEntryPointFromJni(
- IsCriticalNative() ? GetJniDlsymLookupCriticalStub() : GetJniDlsymLookupStub());
-}
-
bool ArtMethod::IsOverridableByDefaultMethod() {
return GetDeclaringClass()->IsInterface();
}
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 70d8d15..16b4648 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -489,13 +489,6 @@
ClearFastInterpreterToInterpreterInvokeFlag();
}
- // Registers the native method and returns the new entry point. NB The returned entry point might
- // be different from the native_method argument if some MethodCallback modifies it.
- const void* RegisterNative(const void* native_method)
- REQUIRES_SHARED(Locks::mutator_lock_) WARN_UNUSED;
-
- void UnregisterNative() REQUIRES_SHARED(Locks::mutator_lock_);
-
static constexpr MemberOffset DataOffset(PointerSize pointer_size) {
return MemberOffset(PtrSizedFieldsOffset(pointer_size) + OFFSETOF_MEMBER(
PtrSizedFields, data_) / sizeof(void*) * static_cast<size_t>(pointer_size));
@@ -587,7 +580,9 @@
void SetEntryPointFromJni(const void* entrypoint)
REQUIRES_SHARED(Locks::mutator_lock_) {
- DCHECK(IsNative());
+ // The resolution method also has a JNI entrypoint for direct calls from
+ // compiled code to the JNI dlsym lookup stub for @CriticalNative.
+ DCHECK(IsNative() || IsRuntimeMethod());
SetEntryPointFromJniPtrSize(entrypoint, kRuntimePointerSize);
}
@@ -837,6 +832,8 @@
// Depending on the method type, the data is
// - native method: pointer to the JNI function registered to this method
// or a function to resolve the JNI function,
+ // - resolution method: pointer to a function to resolve the method and
+ // the JNI function for @CriticalNative.
// - conflict method: ImtConflictTable,
// - abstract/interface method: the single-implementation if any,
// - proxy method: the original interface method or constructor,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index f24c5f4..f7fe27d 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -322,7 +322,7 @@
vm->DeleteWeakGlobalRef(self, classes_[i]);
if (klass != nullptr) {
mirror::Class::SetStatus(klass, ClassStatus::kVisiblyInitialized, self);
- class_linker_->FixupStaticTrampolines(klass.Get());
+ class_linker_->FixupStaticTrampolines(self, klass.Get());
}
}
num_classes_ = 0u;
@@ -422,14 +422,14 @@
// Thanks to the x86 memory model, we do not need any memory fences and
// we can immediately mark the class as visibly initialized.
mirror::Class::SetStatus(klass, ClassStatus::kVisiblyInitialized, self);
- FixupStaticTrampolines(klass.Get());
+ FixupStaticTrampolines(self, klass.Get());
return nullptr;
}
if (Runtime::Current()->IsActiveTransaction()) {
// Transactions are single-threaded, so we can mark the class as visibly intialized.
// (Otherwise we'd need to track the callback's entry in the transaction for rollback.)
mirror::Class::SetStatus(klass, ClassStatus::kVisiblyInitialized, self);
- FixupStaticTrampolines(klass.Get());
+ FixupStaticTrampolines(self, klass.Get());
return nullptr;
}
mirror::Class::SetStatus(klass, ClassStatus::kInitialized, self);
@@ -449,6 +449,65 @@
}
}
+const void* ClassLinker::RegisterNative(
+ Thread* self, ArtMethod* method, const void* native_method) {
+ CHECK(method->IsNative()) << method->PrettyMethod();
+ CHECK(native_method != nullptr) << method->PrettyMethod();
+ void* new_native_method = nullptr;
+ Runtime* runtime = Runtime::Current();
+ runtime->GetRuntimeCallbacks()->RegisterNativeMethod(method,
+ native_method,
+ /*out*/&new_native_method);
+ if (method->IsCriticalNative()) {
+ MutexLock lock(self, critical_native_code_with_clinit_check_lock_);
+ // Remove old registered method if any.
+ auto it = critical_native_code_with_clinit_check_.find(method);
+ if (it != critical_native_code_with_clinit_check_.end()) {
+ critical_native_code_with_clinit_check_.erase(it);
+ }
+ // To ensure correct memory visibility, we need the class to be visibly
+ // initialized before we can set the JNI entrypoint.
+ if (method->GetDeclaringClass()->IsVisiblyInitialized()) {
+ method->SetEntryPointFromJni(new_native_method);
+ } else {
+ critical_native_code_with_clinit_check_.emplace(method, new_native_method);
+ }
+ } else {
+ method->SetEntryPointFromJni(new_native_method);
+ }
+ return new_native_method;
+}
+
+void ClassLinker::UnregisterNative(Thread* self, ArtMethod* method) {
+ CHECK(method->IsNative()) << method->PrettyMethod();
+ // Restore stub to lookup native pointer via dlsym.
+ if (method->IsCriticalNative()) {
+ MutexLock lock(self, critical_native_code_with_clinit_check_lock_);
+ auto it = critical_native_code_with_clinit_check_.find(method);
+ if (it != critical_native_code_with_clinit_check_.end()) {
+ critical_native_code_with_clinit_check_.erase(it);
+ }
+ method->SetEntryPointFromJni(GetJniDlsymLookupCriticalStub());
+ } else {
+ method->SetEntryPointFromJni(GetJniDlsymLookupStub());
+ }
+}
+
+const void* ClassLinker::GetRegisteredNative(Thread* self, ArtMethod* method) {
+ if (method->IsCriticalNative()) {
+ MutexLock lock(self, critical_native_code_with_clinit_check_lock_);
+ auto it = critical_native_code_with_clinit_check_.find(method);
+ if (it != critical_native_code_with_clinit_check_.end()) {
+ return it->second;
+ }
+ const void* native_code = method->GetEntryPointFromJni();
+ return IsJniDlsymLookupCriticalStub(native_code) ? nullptr : native_code;
+ } else {
+ const void* native_code = method->GetEntryPointFromJni();
+ return IsJniDlsymLookupStub(native_code) ? nullptr : native_code;
+ }
+}
+
void ClassLinker::ThrowEarlierClassFailure(ObjPtr<mirror::Class> c,
bool wrap_in_no_class_def,
bool log) {
@@ -638,6 +697,8 @@
image_pointer_size_(kRuntimePointerSize),
visibly_initialized_callback_lock_("visibly initialized callback lock"),
visibly_initialized_callback_(nullptr),
+ critical_native_code_with_clinit_check_lock_("critical native code with clinit check lock"),
+ critical_native_code_with_clinit_check_(),
cha_(Runtime::Current()->IsAotCompiler() ? nullptr : new ClassHierarchyAnalysis()) {
// For CHA disabled during Aot, see b/34193647.
@@ -2498,6 +2559,17 @@
CHAOnDeleteUpdateClassVisitor visitor(data.allocator);
data.class_table->Visit<CHAOnDeleteUpdateClassVisitor, kWithoutReadBarrier>(visitor);
}
+ {
+ MutexLock lock(self, critical_native_code_with_clinit_check_lock_);
+ auto end = critical_native_code_with_clinit_check_.end();
+ for (auto it = critical_native_code_with_clinit_check_.begin(); it != end; ) {
+ if (data.allocator->ContainsUnsafe(it->first)) {
+ it = critical_native_code_with_clinit_check_.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ }
delete data.allocator;
delete data.class_table;
@@ -3531,15 +3603,31 @@
return false;
}
-void ClassLinker::FixupStaticTrampolines(ObjPtr<mirror::Class> klass) {
+void ClassLinker::FixupStaticTrampolines(Thread* self, ObjPtr<mirror::Class> klass) {
ScopedAssertNoThreadSuspension sants(__FUNCTION__);
DCHECK(klass->IsVisiblyInitialized()) << klass->PrettyDescriptor();
- if (klass->NumDirectMethods() == 0) {
+ size_t num_direct_methods = klass->NumDirectMethods();
+ if (num_direct_methods == 0) {
return; // No direct methods => no static methods.
}
if (UNLIKELY(klass->IsProxyClass())) {
return;
}
+ PointerSize pointer_size = image_pointer_size_;
+ if (std::any_of(klass->GetDirectMethods(pointer_size).begin(),
+ klass->GetDirectMethods(pointer_size).end(),
+ [](const ArtMethod& m) { return m.IsCriticalNative(); })) {
+ // Store registered @CriticalNative methods, if any, to JNI entrypoints.
+ // Direct methods are a contiguous chunk of memory, so use the ordering of the map.
+ ArtMethod* first_method = klass->GetDirectMethod(0u, pointer_size);
+ ArtMethod* last_method = klass->GetDirectMethod(num_direct_methods - 1u, pointer_size);
+ MutexLock lock(self, critical_native_code_with_clinit_check_lock_);
+ auto lb = critical_native_code_with_clinit_check_.lower_bound(first_method);
+ while (lb != critical_native_code_with_clinit_check_.end() && lb->first <= last_method) {
+ lb->first->SetEntryPointFromJni(lb->second);
+ lb = critical_native_code_with_clinit_check_.erase(lb);
+ }
+ }
Runtime* runtime = Runtime::Current();
if (!runtime->IsStarted()) {
if (runtime->IsAotCompiler() || runtime->GetHeap()->HasBootImageSpace()) {
@@ -3548,18 +3636,13 @@
}
const DexFile& dex_file = klass->GetDexFile();
- const uint16_t class_def_idx = klass->GetDexClassDefIndex();
- CHECK_NE(class_def_idx, DexFile::kDexNoIndex16);
- ClassAccessor accessor(dex_file, class_def_idx);
- // There should always be class data if there were direct methods.
- CHECK(accessor.HasClassData()) << klass->PrettyDescriptor();
bool has_oat_class;
OatFile::OatClass oat_class = OatFile::FindOatClass(dex_file,
klass->GetDexClassDefIndex(),
&has_oat_class);
// Link the code of methods skipped by LinkCode.
- for (size_t method_index = 0; method_index < accessor.NumDirectMethods(); ++method_index) {
- ArtMethod* method = klass->GetDirectMethod(method_index, image_pointer_size_);
+ for (size_t method_index = 0; method_index < num_direct_methods; ++method_index) {
+ ArtMethod* method = klass->GetDirectMethod(method_index, pointer_size);
if (!method->IsStatic()) {
// Only update static methods.
continue;
@@ -3664,8 +3747,10 @@
}
if (method->IsNative()) {
- // Unregistering restores the dlsym lookup stub.
- method->UnregisterNative();
+ // Set up the dlsym lookup stub. Do not go through `UnregisterNative()`
+ // as the extra processing for @CriticalNative is not needed yet.
+ method->SetEntryPointFromJni(
+ method->IsCriticalNative() ? GetJniDlsymLookupCriticalStub() : GetJniDlsymLookupStub());
if (enter_interpreter || quick_code == nullptr) {
// We have a native method here without code. Then it should have the generic JNI
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 4731203..33cd2f9 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -18,6 +18,7 @@
#define ART_RUNTIME_CLASS_LINKER_H_
#include <list>
+#include <map>
#include <set>
#include <string>
#include <type_traits>
@@ -41,6 +42,27 @@
namespace art {
+class ArtField;
+class ArtMethod;
+class ClassHierarchyAnalysis;
+enum class ClassRoot : uint32_t;
+class ClassTable;
+class DexFile;
+template<class T> class Handle;
+class ImtConflictTable;
+template<typename T> class LengthPrefixedArray;
+template<class T> class MutableHandle;
+class InternTable;
+class LinearAlloc;
+class OatFile;
+template<class T> class ObjectLock;
+class Runtime;
+class ScopedObjectAccessAlreadyRunnable;
+template<size_t kNumReferences> class PACKED(4) StackHandleScope;
+class Thread;
+
+enum VisitRootFlags : uint8_t;
+
namespace dex {
struct ClassDef;
struct MethodHandleItem;
@@ -75,27 +97,6 @@
using MethodDexCacheType = std::atomic<MethodDexCachePair>;
} // namespace mirror
-class ArtField;
-class ArtMethod;
-class ClassHierarchyAnalysis;
-enum class ClassRoot : uint32_t;
-class ClassTable;
-class DexFile;
-template<class T> class Handle;
-class ImtConflictTable;
-template<typename T> class LengthPrefixedArray;
-template<class T> class MutableHandle;
-class InternTable;
-class LinearAlloc;
-class OatFile;
-template<class T> class ObjectLock;
-class Runtime;
-class ScopedObjectAccessAlreadyRunnable;
-template<size_t kNumReferences> class PACKED(4) StackHandleScope;
-class Thread;
-
-enum VisitRootFlags : uint8_t;
-
class ClassVisitor {
public:
virtual ~ClassVisitor() {}
@@ -780,6 +781,19 @@
void MakeInitializedClassesVisiblyInitialized(Thread* self, bool wait);
+ // Registers the native method and returns the new entry point. NB The returned entry point
+ // might be different from the native_method argument if some MethodCallback modifies it.
+ const void* RegisterNative(Thread* self, ArtMethod* method, const void* native_method)
+ REQUIRES_SHARED(Locks::mutator_lock_) WARN_UNUSED;
+
+ // Unregister native code for a method.
+ void UnregisterNative(Thread* self, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Get the registered native method entrypoint, if any, otherwise null.
+ const void* GetRegisteredNative(Thread* self, ArtMethod* method)
+ REQUIRES_SHARED(Locks::mutator_lock_)
+ REQUIRES(!critical_native_code_with_clinit_check_lock_);
+
struct DexCacheData {
// Construct an invalid data object.
DexCacheData()
@@ -956,7 +970,8 @@
ArtMethod* dst)
REQUIRES_SHARED(Locks::mutator_lock_);
- void FixupStaticTrampolines(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
+ void FixupStaticTrampolines(Thread* self, ObjPtr<mirror::Class> klass)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Finds a class in a Path- or DexClassLoader, loading it if necessary without using JNI. Hash
// function is supposed to be ComputeModifiedUtf8Hash(descriptor). Returns true if the
@@ -1443,6 +1458,13 @@
IntrusiveForwardList<VisiblyInitializedCallback> running_visibly_initialized_callbacks_
GUARDED_BY(visibly_initialized_callback_lock_);
+ // Registered native code for @CriticalNative methods of classes that are not visibly
+ // initialized. These code pointers cannot be stored in ArtMethod as that would risk
+ // skipping the class initialization check for direct calls from compiled code.
+ Mutex critical_native_code_with_clinit_check_lock_;
+ std::map<ArtMethod*, void*> critical_native_code_with_clinit_check_
+ GUARDED_BY(critical_native_code_with_clinit_check_lock_);
+
std::unique_ptr<ClassHierarchyAnalysis> cha_;
class FindVirtualMethodHolderVisitor;
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 849a967..ef0c474 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -23,16 +23,19 @@
#include "base/sdk_version.h"
#include "class_linker-inl.h"
#include "dex/dex_file-inl.h"
+#include "dex/method_reference.h"
#include "entrypoints/entrypoint_utils-inl.h"
#include "entrypoints/quick/callee_save_frame.h"
#include "entrypoints/runtime_asm_entrypoints.h"
#include "gc/accounting/card_table-inl.h"
+#include "index_bss_mapping.h"
#include "jni/java_vm_ext.h"
#include "mirror/class-inl.h"
#include "mirror/method.h"
#include "mirror/object-inl.h"
#include "mirror/object_array-inl.h"
#include "nth_caller_visitor.h"
+#include "oat_file.h"
#include "oat_quick_method_header.h"
#include "reflection.h"
#include "scoped_thread_state_change-inl.h"
@@ -281,4 +284,28 @@
return method_type;
}
+void MaybeUpdateBssMethodEntry(ArtMethod* callee, MethodReference callee_reference) {
+ DCHECK(callee != nullptr);
+ if (callee_reference.dex_file->GetOatDexFile() != nullptr) {
+ size_t bss_offset = IndexBssMappingLookup::GetBssOffset(
+ callee_reference.dex_file->GetOatDexFile()->GetMethodBssMapping(),
+ callee_reference.index,
+ callee_reference.dex_file->NumMethodIds(),
+ static_cast<size_t>(kRuntimePointerSize));
+ if (bss_offset != IndexBssMappingLookup::npos) {
+ DCHECK_ALIGNED(bss_offset, static_cast<size_t>(kRuntimePointerSize));
+ const OatFile* oat_file = callee_reference.dex_file->GetOatDexFile()->GetOatFile();
+ ArtMethod** method_entry = reinterpret_cast<ArtMethod**>(const_cast<uint8_t*>(
+ oat_file->BssBegin() + bss_offset));
+ DCHECK_GE(method_entry, oat_file->GetBssMethods().data());
+ DCHECK_LT(method_entry,
+ oat_file->GetBssMethods().data() + oat_file->GetBssMethods().size());
+ std::atomic<ArtMethod*>* atomic_entry =
+ reinterpret_cast<std::atomic<ArtMethod*>*>(method_entry);
+ static_assert(sizeof(*method_entry) == sizeof(*atomic_entry), "Size check.");
+ atomic_entry->store(callee, std::memory_order_release);
+ }
+ }
+}
+
} // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 85082d3..dfc1edd 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -44,6 +44,7 @@
class ArtMethod;
class HandleScope;
enum InvokeType : uint32_t;
+class MethodReference;
class OatQuickMethodHeader;
class ScopedObjectAccessAlreadyRunnable;
class Thread;
@@ -218,6 +219,10 @@
inline HandleScope* GetGenericJniHandleScope(ArtMethod** managed_sp,
size_t num_handle_scope_references);
+// Update .bss method entrypoint if the `callee_reference` has an associated oat file
+// and that oat file has a .bss entry for the `callee_reference`.
+void MaybeUpdateBssMethodEntry(ArtMethod* callee, MethodReference callee_reference);
+
} // namespace art
#endif // ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index f1e5772..98192c2 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -22,31 +22,99 @@
#include "arch/x86/jni_frame_x86.h"
#include "arch/x86_64/jni_frame_x86_64.h"
#include "art_method-inl.h"
-#include "entrypoints/entrypoint_utils.h"
+#include "dex/dex_instruction-inl.h"
+#include "dex/method_reference.h"
+#include "entrypoints/entrypoint_utils-inl.h"
#include "jni/java_vm_ext.h"
#include "mirror/object-inl.h"
+#include "oat_quick_method_header.h"
#include "scoped_thread_state_change-inl.h"
+#include "stack_map.h"
#include "thread.h"
namespace art {
+static inline uint32_t GetInvokeStaticMethodIndex(ArtMethod* caller, uint32_t dex_pc)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ // Get the DexFile and method index.
+ const Instruction& instruction = caller->DexInstructions().InstructionAt(dex_pc);
+ DCHECK(instruction.Opcode() == Instruction::INVOKE_STATIC ||
+ instruction.Opcode() == Instruction::INVOKE_STATIC_RANGE);
+ uint32_t method_idx = (instruction.Opcode() == Instruction::INVOKE_STATIC)
+ ? instruction.VRegB_35c()
+ : instruction.VRegB_3rc();
+ return method_idx;
+}
+
// Used by the JNI dlsym stub to find the native method to invoke if none is registered.
extern "C" const void* artFindNativeMethodRunnable(Thread* self)
REQUIRES_SHARED(Locks::mutator_lock_) {
Locks::mutator_lock_->AssertSharedHeld(self); // We come here as Runnable.
- ArtMethod* method = self->GetCurrentMethod(nullptr);
+ uint32_t dex_pc;
+ ArtMethod* method = self->GetCurrentMethod(&dex_pc);
DCHECK(method != nullptr);
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+
+ if (!method->IsNative()) {
+ // We're coming from compiled managed code and the `method` we see here is the caller.
+ // Resolve target @CriticalNative method for a direct call from compiled managed code.
+ uint32_t method_idx = GetInvokeStaticMethodIndex(method, dex_pc);
+ ArtMethod* target_method = class_linker->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
+ self, method_idx, method, kStatic);
+ if (target_method == nullptr) {
+ self->AssertPendingException();
+ return nullptr;
+ }
+ DCHECK(target_method->IsCriticalNative());
+ MaybeUpdateBssMethodEntry(target_method, MethodReference(method->GetDexFile(), method_idx));
+
+ // These calls do not have an explicit class initialization check, so do the check now.
+ // (When going through the stub or GenericJNI, the check was already done.)
+ DCHECK(NeedsClinitCheckBeforeCall(target_method));
+ ObjPtr<mirror::Class> declaring_class = target_method->GetDeclaringClass();
+ if (UNLIKELY(!declaring_class->IsVisiblyInitialized())) {
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> h_class(hs.NewHandle(declaring_class));
+ if (!class_linker->EnsureInitialized(self, h_class, true, true)) {
+ DCHECK(self->IsExceptionPending()) << method->PrettyMethod();
+ return nullptr;
+ }
+ }
+
+ // Replace the runtime method on the stack with the target method.
+ DCHECK(!self->GetManagedStack()->GetTopQuickFrameTag());
+ ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrameKnownNotTagged();
+ DCHECK(*sp == Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs));
+ *sp = target_method;
+ self->SetTopOfStackTagged(sp); // Fake GenericJNI frame.
+
+ // Continue with the target method.
+ method = target_method;
+ }
+ DCHECK(method == self->GetCurrentMethod(/*dex_pc=*/ nullptr));
+
+ // Check whether we already have a registered native code.
+ // For @CriticalNative it may not be stored in the ArtMethod as a JNI entrypoint if the class
+ // was not visibly initialized yet. Do this check also for @FastNative and normal native for
+ // consistency; though success would mean that another thread raced to do this lookup.
+ const void* native_code = class_linker->GetRegisteredNative(self, method);
+ if (native_code != nullptr) {
+ return native_code;
+ }
// Lookup symbol address for method, on failure we'll return null with an exception set,
// otherwise we return the address of the method we found.
JavaVMExt* vm = down_cast<JNIEnvExt*>(self->GetJniEnv())->GetVm();
- void* native_code = vm->FindCodeForNativeMethod(method);
+ native_code = vm->FindCodeForNativeMethod(method);
if (native_code == nullptr) {
self->AssertPendingException();
return nullptr;
}
- // Register so that future calls don't come here
- return method->RegisterNative(native_code);
+
+ // Register the code. This usually prevents future calls from coming to this function again.
+ // We can still come here if the ClassLinker cannot set the entrypoint in the ArtMethod,
+ // i.e. for @CriticalNative methods with the declaring class not visibly initialized.
+ return class_linker->RegisterNative(self, method, native_code);
}
// Used by the JNI dlsym stub to find the native method to invoke if none is registered.
@@ -57,23 +125,65 @@
return artFindNativeMethodRunnable(self);
}
-extern "C" size_t artCriticalNativeOutArgsSize(ArtMethod* method)
+extern "C" size_t artCriticalNativeFrameSize(ArtMethod* method, uintptr_t caller_pc)
REQUIRES_SHARED(Locks::mutator_lock_) {
- uint32_t shorty_len;
- const char* shorty = method->GetShorty(&shorty_len);
- switch (kRuntimeISA) {
- case InstructionSet::kArm:
- case InstructionSet::kThumb2:
- return arm::GetCriticalNativeOutArgsSize(shorty, shorty_len);
- case InstructionSet::kArm64:
- return arm64::GetCriticalNativeOutArgsSize(shorty, shorty_len);
- case InstructionSet::kX86:
- return x86::GetCriticalNativeOutArgsSize(shorty, shorty_len);
- case InstructionSet::kX86_64:
- return x86_64::GetCriticalNativeOutArgsSize(shorty, shorty_len);
- default:
- UNIMPLEMENTED(FATAL) << kRuntimeISA;
- UNREACHABLE();
+ if (method->IsNative()) {
+ // Get the method's shorty.
+ DCHECK(method->IsCriticalNative());
+ uint32_t shorty_len;
+ const char* shorty = method->GetShorty(&shorty_len);
+
+ // Return the platform-dependent stub frame size.
+ switch (kRuntimeISA) {
+ case InstructionSet::kArm:
+ case InstructionSet::kThumb2:
+ return arm::GetCriticalNativeStubFrameSize(shorty, shorty_len);
+ case InstructionSet::kArm64:
+ return arm64::GetCriticalNativeStubFrameSize(shorty, shorty_len);
+ case InstructionSet::kX86:
+ return x86::GetCriticalNativeStubFrameSize(shorty, shorty_len);
+ case InstructionSet::kX86_64:
+ return x86_64::GetCriticalNativeStubFrameSize(shorty, shorty_len);
+ default:
+ UNIMPLEMENTED(FATAL) << kRuntimeISA;
+ UNREACHABLE();
+ }
+ } else {
+ // We're coming from compiled managed code and the `method` we see here is the compiled
+ // method that made the call. Get the actual caller (may be inlined) and dex pc.
+ const OatQuickMethodHeader* current_code = method->GetOatQuickMethodHeader(caller_pc);
+ DCHECK(current_code != nullptr);
+ DCHECK(current_code->IsOptimized());
+ uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+ CodeInfo code_info = CodeInfo::DecodeInlineInfoOnly(current_code);
+ StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+ DCHECK(stack_map.IsValid());
+ BitTableRange<InlineInfo> inline_infos = code_info.GetInlineInfosOf(stack_map);
+ ArtMethod* caller =
+ inline_infos.empty() ? method : GetResolvedMethod(method, code_info, inline_infos);
+ uint32_t dex_pc = inline_infos.empty() ? stack_map.GetDexPc() : inline_infos.back().GetDexPc();
+
+ // Get the callee shorty.
+ const DexFile* dex_file = method->GetDexFile();
+ uint32_t method_idx = GetInvokeStaticMethodIndex(caller, dex_pc);
+ uint32_t shorty_len;
+ const char* shorty = dex_file->GetMethodShorty(dex_file->GetMethodId(method_idx), &shorty_len);
+
+ // Return the platform-dependent direct call frame size.
+ switch (kRuntimeISA) {
+ case InstructionSet::kArm:
+ case InstructionSet::kThumb2:
+ return arm::GetCriticalNativeDirectCallFrameSize(shorty, shorty_len);
+ case InstructionSet::kArm64:
+ return arm64::GetCriticalNativeDirectCallFrameSize(shorty, shorty_len);
+ case InstructionSet::kX86:
+ return x86::GetCriticalNativeDirectCallFrameSize(shorty, shorty_len);
+ case InstructionSet::kX86_64:
+ return x86_64::GetCriticalNativeDirectCallFrameSize(shorty, shorty_len);
+ default:
+ UNIMPLEMENTED(FATAL) << kRuntimeISA;
+ UNREACHABLE();
+ }
}
}
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 8508086..77a9cfa 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -32,7 +32,6 @@
#include "gc/accounting/card_table-inl.h"
#include "imt_conflict_table.h"
#include "imtable-inl.h"
-#include "index_bss_mapping.h"
#include "instrumentation.h"
#include "interpreter/interpreter.h"
#include "interpreter/interpreter_common.h"
@@ -1319,26 +1318,9 @@
called = linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
self, called_method.index, caller, invoke_type);
- // Update .bss entry in oat file if any.
- if (called != nullptr && called_method.dex_file->GetOatDexFile() != nullptr) {
- size_t bss_offset = IndexBssMappingLookup::GetBssOffset(
- called_method.dex_file->GetOatDexFile()->GetMethodBssMapping(),
- called_method.index,
- called_method.dex_file->NumMethodIds(),
- static_cast<size_t>(kRuntimePointerSize));
- if (bss_offset != IndexBssMappingLookup::npos) {
- DCHECK_ALIGNED(bss_offset, static_cast<size_t>(kRuntimePointerSize));
- const OatFile* oat_file = called_method.dex_file->GetOatDexFile()->GetOatFile();
- ArtMethod** method_entry = reinterpret_cast<ArtMethod**>(const_cast<uint8_t*>(
- oat_file->BssBegin() + bss_offset));
- DCHECK_GE(method_entry, oat_file->GetBssMethods().data());
- DCHECK_LT(method_entry,
- oat_file->GetBssMethods().data() + oat_file->GetBssMethods().size());
- std::atomic<ArtMethod*>* atomic_entry =
- reinterpret_cast<std::atomic<ArtMethod*>*>(method_entry);
- static_assert(sizeof(*method_entry) == sizeof(*atomic_entry), "Size check.");
- atomic_entry->store(called, std::memory_order_release);
- }
+ // If successful, update .bss entry in oat file if any.
+ if (called != nullptr) {
+ MaybeUpdateBssMethodEntry(called, called_method);
}
}
const void* code = nullptr;
diff --git a/runtime/interpreter/cfi_asm_support.h b/runtime/interpreter/cfi_asm_support.h
index 04812e1..713bcf8 100644
--- a/runtime/interpreter/cfi_asm_support.h
+++ b/runtime/interpreter/cfi_asm_support.h
@@ -50,10 +50,28 @@
0x92 /* bregx */, reg, (offset & 0x7F), \
0x06 /* DW_OP_DEREF */, \
0x23 /* DW_OP_plus_uconst */, size
+
+ #define CFI_EXPRESSION_BREG_1(n, b, offset) .cfi_escape \
+ 0x10, /* DW_CFA_expression */ \
+ n, /* rule for register n */ \
+ 2, /* expression length */ \
+ 0x70+b, /* DW_OP_BREG<b>() */ \
+ (offset) & 0x7f /* SLEB128 offset */
+
+ #define CFI_EXPRESSION_BREG_2(n, b, offset) .cfi_escape \
+ 0x10, /* DW_CFA_expression */ \
+ n, /* rule for register n */ \
+ 3, /* expression length */ \
+ 0x70+b, /* DW_OP_BREG<b>() */ \
+ ((offset) & 0x7f) | 0x80, /* SLEB128 offset, byte 1 */ \
+ ((offset) >> 7) & 0x7f /* SLEB128 offset, byte 2 */
+
#else
// Mac OS doesn't like cfi_* directives.
#define CFI_DEFINE_DEX_PC_WITH_OFFSET(tmpReg, dexReg, dexOffset)
#define CFI_DEFINE_CFA_DEREF(reg, offset)
+ #define CFI_EXPRESSION_BREG_1(n, b, offset)
+ #define CFI_EXPRESSION_BREG_2(n, b, offset)
#endif
#endif // ART_RUNTIME_INTERPRETER_CFI_ASM_SUPPORT_H_
diff --git a/runtime/jni/jni_internal.cc b/runtime/jni/jni_internal.cc
index c178c38..8e69157 100644
--- a/runtime/jni/jni_internal.cc
+++ b/runtime/jni/jni_internal.cc
@@ -2304,6 +2304,7 @@
return JNI_ERR; // Not reached except in unit tests.
}
CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", java_class, JNI_ERR);
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
ScopedObjectAccess soa(env);
StackHandleScope<1> hs(soa.Self());
Handle<mirror::Class> c = hs.NewHandle(soa.Decode<mirror::Class>(java_class));
@@ -2420,7 +2421,7 @@
// TODO: make this a hard register error in the future.
}
- const void* final_function_ptr = m->RegisterNative(fnPtr);
+ const void* final_function_ptr = class_linker->RegisterNative(soa.Self(), m, fnPtr);
UNUSED(final_function_ptr);
}
return JNI_OK;
@@ -2434,10 +2435,11 @@
VLOG(jni) << "[Unregistering JNI native methods for " << mirror::Class::PrettyClass(c) << "]";
size_t unregistered_count = 0;
- auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ auto pointer_size = class_linker->GetImagePointerSize();
for (auto& m : c->GetMethods(pointer_size)) {
if (m.IsNative()) {
- m.UnregisterNative();
+ class_linker->UnregisterNative(soa.Self(), &m);
unregistered_count++;
}
}
diff --git a/runtime/oat.h b/runtime/oat.h
index d37927d..44b61a7 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
class PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: Change ClassStatus bits with kVerifiedNeedsAccessChecks.
- static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '8', '3', '\0' } };
+ // Last oat version changed reason: Direct calls to @CriticalNative code.
+ static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '8', '4', '\0' } };
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ad4d7a7..1c1159a 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -2307,8 +2307,10 @@
if (IsAotCompiler()) {
PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
+ method->SetEntryPointFromJniPtrSize(nullptr, pointer_size);
} else {
method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionStub());
+ method->SetEntryPointFromJni(GetJniDlsymLookupCriticalStub());
}
return method;
}
diff --git a/tools/cpp-define-generator/globals.def b/tools/cpp-define-generator/globals.def
index 6c9b2b0..6706fed 100644
--- a/tools/cpp-define-generator/globals.def
+++ b/tools/cpp-define-generator/globals.def
@@ -30,6 +30,10 @@
#include "stack.h"
#endif
+ASM_DEFINE(ACCESS_FLAGS_METHOD_IS_NATIVE,
+ art::kAccNative)
+ASM_DEFINE(ACCESS_FLAGS_METHOD_IS_NATIVE_BIT,
+ art::MostSignificantBit(art::kAccNative))
ASM_DEFINE(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE,
art::kAccFastNative)
ASM_DEFINE(ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE,