Direct calls to @CriticalNative methods.
Emit direct calls from compiled managed code to the native
code registered with the method, avoiding the JNI stub.
Golem results:
art-opt-cc x86 x86-64 arm arm64
NativeDowncallStaticCritical +12.5% +62.5% +75.9% +41.7%
NativeDowncallStaticCritical6 +55.6% +87.5% +72.1% +35.3%
art-opt x86 x86-64 arm arm64
NativeDowncallStaticCritical +28.6% +85.6% +76.4% +38.4%
NativeDowncallStaticCritical6 +44.6% +44.6% +74.6% +32.2%
Test: Covered by 178-app-image-native-method.
Test: m test-art-host-gtest
Test: testrunner.py --host --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: testrunner.py --target --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use -t 178
Test: aosp_cf_x86_phone-userdebug boots.
Test: aosp_cf_x86_phone-userdebug/jitzygote boots.
Bug: 112189621
Change-Id: I8b37da51e8fe0b7bc513bb81b127fe0416068866
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 2db1390..685e1e2 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -314,6 +314,12 @@
}
ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig;
+ // Make sure the test class is visibly initialized so that the RegisterNatives() below
+ // sets the JNI entrypoint rather than leaving it as null (this test pretends to be an
+ // AOT compiler and therefore the ClassLinker skips entrypoint initialization). Even
+ // if the ClassLinker initialized it with a stub, we would not want to test that here.
+ class_linker_->MakeInitializedClassesVisiblyInitialized(Thread::Current(), /*wait=*/ true);
+
if (native_fnptr != nullptr) {
JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } };
ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1))
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index d07ab98..7afa8b1 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -420,7 +420,7 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t ArmJniCallingConvention::OutArgSize() const {
+size_t ArmJniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
// Account for arguments passed through r0-r3. (No FP args, AAPCS32 is soft-float.)
@@ -440,7 +440,7 @@
}
size_t out_args_size = RoundUp(size, kAapcsStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -512,9 +512,9 @@
CHECK_GE(itr_slots_, kJniArgumentRegisterCount);
size_t offset =
displacement_.Int32Value()
- - OutArgSize()
+ - OutFrameSize()
+ ((itr_slots_ - kJniArgumentRegisterCount) * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
@@ -537,7 +537,7 @@
// Whether to use tail call (used only for @CriticalNative).
bool ArmJniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == 0u;
+ return OutFrameSize() == 0u;
}
} // namespace arm
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 7896d64..38f7184 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -65,7 +65,7 @@
// JNI calling convention
void Next() override; // Override default behavior for AAPCS
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 32da141..06796c1 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -265,20 +265,14 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t Arm64JniCallingConvention::OutArgSize() const {
+size_t Arm64JniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
size_t num_fp_args = NumFloatOrDoubleArgs();
DCHECK_GE(all_args, num_fp_args);
size_t num_non_fp_args = all_args - num_fp_args;
- // Account for FP arguments passed through v0-v7.
- size_t num_stack_fp_args =
- num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
- // Account for other (integer and pointer) arguments passed through GPR (x0-x7).
- size_t num_stack_non_fp_args =
- num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
// The size of outgoing arguments.
- size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+ size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
// @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64.
static_assert((kCoreCalleeSpillMask & ~kAapcs64CoreCalleeSpillMask) == 0u);
@@ -291,7 +285,7 @@
}
size_t out_args_size = RoundUp(size, kAapcs64StackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -355,8 +349,8 @@
static_cast<size_t>(itr_float_and_doubles_))
- std::min(kMaxIntLikeRegisterArguments,
static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
- size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
@@ -378,7 +372,7 @@
// Whether to use tail call (used only for @CriticalNative).
bool Arm64JniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == 0u;
+ return OutFrameSize() == 0u;
}
} // namespace arm64
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 7beca08..d381d9d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -56,7 +56,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index b4396f0..005ae91 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -303,9 +303,9 @@
// always at the bottom of a frame, but this doesn't work for outgoing
// native args). Includes alignment.
virtual size_t FrameSize() const = 0;
- // Size of outgoing arguments (stack portion), including alignment.
+ // Size of outgoing frame, i.e. stack arguments, @CriticalNative return PC if needed, alignment.
// -- Arguments that are passed via registers are excluded from this size.
- virtual size_t OutArgSize() const = 0;
+ virtual size_t OutFrameSize() const = 0;
// Number of references in stack indirect reference table
size_t ReferenceCount() const;
// Location where the segment state of the local indirect reference table is saved
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 036cdbb..913a3ba 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -220,7 +220,7 @@
// 1. Build the frame saving all callee saves, Method*, and PC return address.
// For @CriticalNative, this includes space for out args, otherwise just the managed frame.
const size_t managed_frame_size = main_jni_conv->FrameSize();
- const size_t main_out_arg_size = main_jni_conv->OutArgSize();
+ const size_t main_out_arg_size = main_jni_conv->OutFrameSize();
size_t current_frame_size = is_critical_native ? main_out_arg_size : managed_frame_size;
ManagedRegister method_register =
is_critical_native ? ManagedRegister::NoRegister() : mr_conv->MethodRegister();
@@ -582,7 +582,7 @@
if (LIKELY(!is_critical_native)) {
// Increase frame size for out args if needed by the end_jni_conv.
- const size_t end_out_arg_size = end_jni_conv->OutArgSize();
+ const size_t end_out_arg_size = end_jni_conv->OutFrameSize();
if (end_out_arg_size > current_out_arg_size) {
size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
current_out_arg_size = end_out_arg_size;
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 6776f12..df45627 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -220,11 +220,10 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t X86JniCallingConvention::OutArgSize() const {
- // Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
- size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
- // The size of outgoiong arguments.
- size_t size = all_args * kFramePointerSize;
+size_t X86JniCallingConvention::OutFrameSize() const {
+ // The size of outgoing arguments.
+ size_t size = GetNativeOutArgsSize(/*num_args=*/ NumberOfExtraArgumentsForJni() + NumArgs(),
+ NumLongOrDoubleArgs());
// @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS.
static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
@@ -244,14 +243,16 @@
if (return_type_ok && size == kFramePointerSize) {
// Note: This is not aligned to kNativeStackAlignment but that's OK for tail call.
static_assert(kFramePointerSize < kNativeStackAlignment);
- DCHECK_EQ(kFramePointerSize, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ // The stub frame size is considered 0 in the callee where the return PC is a part of
+ // the callee frame but it is kPointerSize in the compiled stub before the tail call.
+ DCHECK_EQ(0u, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
return kFramePointerSize;
}
}
size_t out_args_size = RoundUp(size, kNativeStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -279,7 +280,8 @@
}
FrameOffset X86JniCallingConvention::CurrentParamStackOffset() {
- return FrameOffset(displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kFramePointerSize));
+ return
+ FrameOffset(displacement_.Int32Value() - OutFrameSize() + (itr_slots_ * kFramePointerSize));
}
ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const {
@@ -295,7 +297,7 @@
bool X86JniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == kFramePointerSize;
+ return OutFrameSize() == kFramePointerSize;
}
} // namespace x86
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 6f22c2b..81f617d 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -61,7 +61,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index e97cab8..44ae8be 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -208,21 +208,14 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t X86_64JniCallingConvention::OutArgSize() const {
+size_t X86_64JniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
size_t num_fp_args = NumFloatOrDoubleArgs();
DCHECK_GE(all_args, num_fp_args);
size_t num_non_fp_args = all_args - num_fp_args;
- // Account for FP arguments passed through Xmm0..Xmm7.
- size_t num_stack_fp_args =
- num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
- // Account for other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9).
- size_t num_stack_non_fp_args =
- num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
// The size of outgoing arguments.
- static_assert(kFramePointerSize == kMmxSpillSize);
- size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+ size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
if (UNLIKELY(IsCriticalNative())) {
// We always need to spill xmm12-xmm15 as they are managed callee-saves
@@ -239,7 +232,7 @@
size_t out_args_size = RoundUp(size, kNativeStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -297,8 +290,8 @@
- std::min(kMaxIntLikeRegisterArguments,
static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
// Integer arguments passed through GPR
- size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index d043a3e..5bde766 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -56,7 +56,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index cfd9ea6..f74a938 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -32,6 +32,7 @@
#include "code_generator_x86_64.h"
#endif
+#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
#include "base/casts.h"
@@ -503,23 +504,69 @@
if (invoke->IsInvokeStaticOrDirect()) {
HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
- switch (call->GetMethodLoadKind()) {
- case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
- break;
- case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
- locations->AddTemp(visitor->GetMethodLocation());
- locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
- break;
- default:
- locations->AddTemp(visitor->GetMethodLocation());
- break;
+ HInvokeStaticOrDirect::MethodLoadKind method_load_kind = call->GetMethodLoadKind();
+ HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = call->GetCodePtrLocation();
+ if (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ locations->AddTemp(Location::RequiresRegister()); // For target method.
+ }
+ if (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative ||
+ method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kRecursive) {
+ // For `kCallCriticalNative` we need the current method as the hidden argument
+ // if we reach the dlsym lookup stub for @CriticalNative.
+ locations->SetInAt(call->GetCurrentMethodIndex(), visitor->GetMethodLocation());
+ } else {
+ locations->AddTemp(visitor->GetMethodLocation());
+ if (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall) {
+ locations->SetInAt(call->GetCurrentMethodIndex(), Location::RequiresRegister());
+ }
}
} else if (!invoke->IsInvokePolymorphic()) {
locations->AddTemp(visitor->GetMethodLocation());
}
}
+void CodeGenerator::PrepareCriticalNativeArgumentMoves(
+ HInvokeStaticOrDirect* invoke,
+ /*inout*/InvokeDexCallingConventionVisitor* visitor,
+ /*out*/HParallelMove* parallel_move) {
+ LocationSummary* locations = invoke->GetLocations();
+ for (size_t i = 0, num = invoke->GetNumberOfArguments(); i != num; ++i) {
+ Location in_location = locations->InAt(i);
+ DataType::Type type = invoke->InputAt(i)->GetType();
+ DCHECK_NE(type, DataType::Type::kReference);
+ Location out_location = visitor->GetNextLocation(type);
+ if (out_location.IsStackSlot() || out_location.IsDoubleStackSlot()) {
+ // Stack arguments will need to be moved after adjusting the SP.
+ parallel_move->AddMove(in_location, out_location, type, /*instruction=*/ nullptr);
+ } else {
+ // Register arguments should have been assigned their final locations for register allocation.
+ DCHECK(out_location.Equals(in_location)) << in_location << " -> " << out_location;
+ }
+ }
+}
+
+void CodeGenerator::AdjustCriticalNativeArgumentMoves(size_t out_frame_size,
+ /*inout*/HParallelMove* parallel_move) {
+ // Adjust the source stack offsets by `out_frame_size`, i.e. the additional
+ // frame size needed for outgoing stack arguments.
+ for (size_t i = 0, num = parallel_move->NumMoves(); i != num; ++i) {
+ MoveOperands* operands = parallel_move->MoveOperandsAt(i);
+ Location source = operands->GetSource();
+ if (operands->GetSource().IsStackSlot()) {
+ operands->SetSource(Location::StackSlot(source.GetStackIndex() + out_frame_size));
+ } else if (operands->GetSource().IsDoubleStackSlot()) {
+ operands->SetSource(Location::DoubleStackSlot(source.GetStackIndex() + out_frame_size));
+ }
+ }
+}
+
+const char* CodeGenerator::GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke,
+ uint32_t* shorty_len) {
+ ScopedObjectAccess soa(Thread::Current());
+ DCHECK(invoke->GetResolvedMethod()->IsCriticalNative());
+ return invoke->GetResolvedMethod()->GetShorty(shorty_len);
+}
+
void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
MoveConstant(temp, invoke->GetDexMethodIndex());
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ff2be47..4bfc14a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -570,6 +570,28 @@
static void CreateCommonInvokeLocationSummary(
HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
+ template <typename CriticalNativeCallingConventionVisitor,
+ size_t kNativeStackAlignment,
+ size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len)>
+ static size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke,
+ /*out*/HParallelMove* parallel_move) {
+ DCHECK(!invoke->GetLocations()->Intrinsified());
+ CriticalNativeCallingConventionVisitor calling_convention_visitor(
+ /*for_register_allocation=*/ false);
+ PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, parallel_move);
+ size_t out_frame_size =
+ RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment);
+ if (kIsDebugBuild) {
+ uint32_t shorty_len;
+ const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len);
+ DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size);
+ }
+ if (out_frame_size != 0u) {
+ AdjustCriticalNativeArgumentMoves(out_frame_size, parallel_move);
+ }
+ return out_frame_size;
+ }
+
void GenerateInvokeStaticOrDirectRuntimeCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
@@ -799,6 +821,16 @@
bool needs_vreg_info = true);
void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path);
+ static void PrepareCriticalNativeArgumentMoves(
+ HInvokeStaticOrDirect* invoke,
+ /*inout*/InvokeDexCallingConventionVisitor* visitor,
+ /*out*/HParallelMove* parallel_move);
+
+ static void AdjustCriticalNativeArgumentMoves(size_t out_frame_size,
+ /*inout*/HParallelMove* parallel_move);
+
+ static const char* GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke, uint32_t* shorty_len);
+
OptimizingCompilerStats* stats_;
HGraph* const graph_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4a618de..d108623 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -18,6 +18,7 @@
#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/arm64/jni_frame_arm64.h"
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
@@ -870,6 +871,49 @@
return LocationFrom(kArtMethodRegister);
}
+Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFPRegistersLength) {
+ location = LocationFrom(kParameterFPRegisters[fpr_index_]);
+ ++fpr_index_;
+ }
+ } else {
+ // Native ABI uses the same registers as managed, except that the method register x0
+ // is a normal argument.
+ if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
+ location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
+ // Pass the method in the hidden argument x15.
+ return Location::RegisterLocation(x15.GetCode());
+}
+
CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -4295,7 +4339,13 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
@@ -4327,7 +4377,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -4373,6 +4423,19 @@
}
}
+ auto call_code_pointer_member = [&](MemberOffset offset) {
+ // LR = callee_method->member;
+ __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+ {
+ // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ // lr()
+ __ blr(lr);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ }
+ };
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
{
@@ -4384,20 +4447,50 @@
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
break;
- case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
- // LR = callee_method->entry_point_from_quick_compiled_code_;
- __ Ldr(lr, MemOperand(
- XRegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
- {
- // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
- ExactAssemblyScope eas(GetVIXLAssembler(),
- kInstructionSize,
- CodeBufferCheckScope::kExactSize);
- // lr()
- __ blr(lr);
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
+ kAapcs64StackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ Claim(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
}
+ call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArm64PointerSize));
+ // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ Ubfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kInt8:
+ __ Sbfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kUint16:
+ __ Ubfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt16:
+ __ Sbfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ Drop(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+ call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize));
break;
}
@@ -4819,14 +4912,9 @@
return;
}
- {
- // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- LocationSummary* locations = invoke->GetLocations();
- codegen_->GenerateStaticOrDirectCall(
- invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
- }
+ LocationSummary* locations = invoke->GetLocations();
+ codegen_->GenerateStaticOrDirectCall(
+ invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 487d091..bebf43d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -231,6 +231,31 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
};
+class CriticalNativeCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorARM64(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorARM64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t fpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorARM64);
+};
+
class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionARM64() {}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 1d8fd6c..9916257 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -18,6 +18,7 @@
#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm/jni_frame_arm.h"
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
@@ -2435,6 +2436,54 @@
return LocationFrom(kMethodRegister);
}
+Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ // Native ABI uses the same registers as managed, except that the method register r0
+ // is a normal argument.
+ Location location = Location::NoLocation();
+ if (DataType::Is64BitType(type)) {
+ gpr_index_ = RoundUp(gpr_index_, 2u);
+ stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
+ if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
+ location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
+ kParameterCoreRegistersVIXL[gpr_index_]);
+ gpr_index_ += 2u;
+ }
+ } else {
+ if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
+ location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ stack_offset_ += 2 * kFramePointerSize;
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ stack_offset_ += kFramePointerSize;
+ }
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
+ const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
+ // Pass the method in the hidden argument R4.
+ return Location::RegisterLocation(R4);
+}
+
void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
if (source.Equals(destination)) {
return;
@@ -3294,7 +3343,13 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
@@ -8856,35 +8911,35 @@
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ ArtMethod* method) {
+ if (desired_dispatch_info.code_ptr_location ==
+ HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ // TODO: Work around CheckTypeConsistency() in code_generator.cc that does not allow
+ // putting FP values in core registers as we need to do for the soft-float native ABI.
+ ScopedObjectAccess soa(Thread::Current());
+ uint32_t shorty_len;
+ const char* shorty = method->GetShorty(&shorty_len);
+ size_t reg = 0u;
+ for (uint32_t i = 1; i != shorty_len; ++i) {
+ size_t next_reg = reg + 1u;
+ if (shorty[i] == 'D' || shorty[i] == 'J') {
+ reg = RoundUp(reg, 2u);
+ next_reg = reg + 2u;
+ }
+ if (reg == 4u) {
+ break;
+ }
+ if (shorty[i] == 'D' || shorty[i] == 'F') {
+ HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+ dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+ return dispatch_info;
+ }
+ reg = next_reg;
+ }
+ }
return desired_dispatch_info;
}
-vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
- HInvokeStaticOrDirect* invoke, vixl32::Register temp) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
- Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
- if (!invoke->GetLocations()->Intrinsified()) {
- return RegisterFrom(location);
- }
- // For intrinsics we allow any location, so it may be on the stack.
- if (!location.IsRegister()) {
- GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, location.GetStackIndex());
- return temp;
- }
- // For register locations, check if the register was saved. If so, get it from the stack.
- // Note: There is a chance that the register was saved but not overwritten, so we could
- // save one load. However, since this is just an intrinsic slow path we prefer this
- // simple and more robust approach rather that trying to determine if that's the case.
- SlowPathCode* slow_path = GetCurrentSlowPath();
- if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) {
- int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode());
- GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset);
- return temp;
- }
- return RegisterFrom(location);
-}
-
void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
@@ -8897,7 +8952,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -8932,6 +8987,20 @@
}
}
+ auto call_code_pointer_member = [&](MemberOffset offset) {
+ // LR = callee_method->member;
+ GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
+ {
+ // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+ // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+ ExactAssemblyScope aas(GetVIXLAssembler(),
+ vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+ // LR()
+ __ blx(lr);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ }
+ };
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
{
@@ -8943,23 +9012,46 @@
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
break;
- case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
- // LR = callee_method->entry_point_from_quick_compiled_code_
- GetAssembler()->LoadFromOffset(
- kLoadWord,
- lr,
- RegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
- {
- // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
- // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
- ExactAssemblyScope aas(GetVIXLAssembler(),
- vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
- // LR()
- __ blx(lr);
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
+ kAapcsStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ Claim(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
}
+ call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
+ // Move the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kFloat32:
+ __ Vmov(s0, r0);
+ break;
+ case DataType::Type::kFloat64:
+ __ Vmov(d0, r0, r1);
+ break;
+ case DataType::Type::kBool:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ Drop(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+ call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
break;
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 3eed730..d6300c7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -187,6 +187,30 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARMVIXL);
};
+class CriticalNativeCallingConventionVisitorARMVIXL : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorARMVIXL(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorARMVIXL() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorARMVIXL);
+};
+
class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionARMVIXL() {}
@@ -853,9 +877,6 @@
uint32_t encoded_data,
/*out*/ std::string* debug_name);
- vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
- vixl::aarch32::Register temp);
-
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, VIXLUInt32Literal*>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
VIXLUInt32Literal*,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e9ef21a..595b31e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86.h"
+#include "arch/x86/jni_frame_x86.h"
#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
@@ -1300,6 +1301,34 @@
return Location::NoLocation();
}
+Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location;
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ stack_offset_ += 2 * kFramePointerSize;
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ stack_offset_ += kFramePointerSize;
+ }
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorX86 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
+ // Pass the method in the hidden argument EAX.
+ return Location::RegisterLocation(EAX);
+}
+
void CodeGeneratorX86::Move32(Location destination, Location source) {
if (source.Equals(destination)) {
return;
@@ -1374,11 +1403,13 @@
size_t elem_size = DataType::Size(DataType::Type::kInt32);
// Create stack space for 2 elements.
__ subl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(2 * elem_size);
__ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
__ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
__ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
// And remove the temporary stack space we allocated.
__ addl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(-(2 * elem_size));
} else {
LOG(FATAL) << "Unimplemented";
}
@@ -2286,9 +2317,15 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
- // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+ // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
if (invoke->HasPcRelativeMethodLoadKind()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
}
@@ -2989,6 +3026,7 @@
if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
adjustment = DataType::Size(DataType::Type::kInt64);
__ subl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(adjustment);
}
// Load the value to the FP stack, using temporaries if needed.
@@ -3005,6 +3043,7 @@
// Remove the temporary stack space we allocated.
if (adjustment != 0) {
__ addl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(-adjustment);
}
break;
}
@@ -3039,6 +3078,7 @@
if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
adjustment = DataType::Size(DataType::Type::kInt64);
__ subl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(adjustment);
}
// Load the value to the FP stack, using temporaries if needed.
@@ -3055,6 +3095,7 @@
// Remove the temporary stack space we allocated.
if (adjustment != 0) {
__ addl(ESP, Immediate(adjustment));
+ __ cfi().AdjustCFAOffset(-adjustment);
}
break;
}
@@ -3551,6 +3592,7 @@
// Create stack space for 2 elements.
// TODO: enhance register allocator to ask for stack temporaries.
__ subl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(2 * elem_size);
// Load the values to the FP stack in reverse order, using temporaries if needed.
const bool is_wide = !is_float;
@@ -3591,6 +3633,7 @@
// And remove the temporary stack space we allocated.
__ addl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(-(2 * elem_size));
}
@@ -4934,7 +4977,6 @@
Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
Register temp) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
if (!invoke->GetLocations()->Intrinsified()) {
return location.AsRegister<Register>();
@@ -4970,7 +5012,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -5009,15 +5051,73 @@
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
__ call(GetFrameEntryLabel());
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
+ kNativeStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ subl(ESP, Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
+ }
+ // (callee_method + offset_of_jni_entry_point)()
+ __ call(Address(callee_method.AsRegister<Register>(),
+ ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
+ // Create space for conversion.
+ out_frame_size = 8u;
+ __ subl(ESP, Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(out_frame_size);
+ }
+ // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ movzxb(EAX, AL);
+ break;
+ case DataType::Type::kInt8:
+ __ movsxb(EAX, AL);
+ break;
+ case DataType::Type::kUint16:
+ __ movzxw(EAX, EAX);
+ break;
+ case DataType::Type::kInt16:
+ __ movsxw(EAX, EAX);
+ break;
+ case DataType::Type::kFloat32:
+ __ fstps(Address(ESP, 0));
+ __ movss(XMM0, Address(ESP, 0));
+ break;
+ case DataType::Type::kFloat64:
+ __ fstpl(Address(ESP, 0));
+ __ movsd(XMM0, Address(ESP, 0));
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ addl(ESP, Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
// (callee_method + offset_of_quick_compiled_code)()
__ call(Address(callee_method.AsRegister<Register>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86PointerSize).Int32Value()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
}
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
DCHECK(!IsLeafMethod());
}
@@ -5072,7 +5172,6 @@
}
void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
boot_image_method_patches_.emplace_back(
@@ -5081,7 +5180,6 @@
}
void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
// Add the patch entry and bind its label at the end of the instruction.
@@ -5126,7 +5224,6 @@
uint32_t boot_image_reference,
HInvokeStaticOrDirect* invoke) {
if (GetCompilerOptions().IsBootImage()) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
DCHECK(method_address != nullptr);
@@ -5135,7 +5232,6 @@
__ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
} else if (GetCompilerOptions().GetCompilePic()) {
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
DCHECK(method_address != nullptr);
@@ -5160,7 +5256,6 @@
if (GetCompilerOptions().IsBootImage()) {
DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
// Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
- DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
DCHECK(method_address != nullptr);
@@ -6365,24 +6460,45 @@
__ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
}
} else if (source.IsRegisterPair()) {
+ if (destination.IsRegisterPair()) {
+ __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
+ DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
+ __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+ } else if (destination.IsFpuRegister()) {
size_t elem_size = DataType::Size(DataType::Type::kInt32);
- // Create stack space for 2 elements.
- __ subl(ESP, Immediate(2 * elem_size));
- __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
- __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
+ // Push the 2 source registers to stack.
+ __ pushl(source.AsRegisterPairHigh<Register>());
+ __ cfi().AdjustCFAOffset(elem_size);
+ __ pushl(source.AsRegisterPairLow<Register>());
+ __ cfi().AdjustCFAOffset(elem_size);
+ // Load the destination register.
__ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
// And remove the temporary stack space we allocated.
__ addl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(-(2 * elem_size));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
+ __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
+ source.AsRegisterPairHigh<Register>());
+ }
} else if (source.IsFpuRegister()) {
if (destination.IsRegister()) {
__ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
} else if (destination.IsFpuRegister()) {
__ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
} else if (destination.IsRegisterPair()) {
- XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
- __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
- __ psrlq(src_reg, Immediate(32));
- __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
+ size_t elem_size = DataType::Size(DataType::Type::kInt32);
+ // Create stack space for 2 elements.
+ __ subl(ESP, Immediate(2 * elem_size));
+ __ cfi().AdjustCFAOffset(2 * elem_size);
+ // Store the source register.
+ __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
+ // And pop the values into destination registers.
+ __ popl(destination.AsRegisterPairLow<Register>());
+ __ cfi().AdjustCFAOffset(-elem_size);
+ __ popl(destination.AsRegisterPairHigh<Register>());
+ __ cfi().AdjustCFAOffset(-elem_size);
} else if (destination.IsStackSlot()) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
} else if (destination.IsDoubleStackSlot()) {
@@ -6480,9 +6596,12 @@
__ xorpd(dest, dest);
} else {
__ pushl(high);
+ __ cfi().AdjustCFAOffset(4);
__ pushl(low);
+ __ cfi().AdjustCFAOffset(4);
__ movsd(dest, Address(ESP, 0));
__ addl(ESP, Immediate(8));
+ __ cfi().AdjustCFAOffset(-8);
}
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
@@ -6520,10 +6639,12 @@
void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
size_t extra_slot = 4 * kX86WordSize;
__ subl(ESP, Immediate(extra_slot));
+ __ cfi().AdjustCFAOffset(extra_slot);
__ movups(Address(ESP, 0), XmmRegister(reg));
ExchangeMemory(0, mem + extra_slot, 4);
__ movups(XmmRegister(reg), Address(ESP, 0));
__ addl(ESP, Immediate(extra_slot));
+ __ cfi().AdjustCFAOffset(-extra_slot);
}
void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 43f5acd..22d8778 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -93,6 +93,29 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
};
+class CriticalNativeCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorX86(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorX86() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86);
+};
+
class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionX86() {}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ec54376..4a0cc78 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86_64.h"
+#include "arch/x86_64/jni_frame_x86_64.h"
#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
@@ -978,6 +979,16 @@
UNREACHABLE();
}
+void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
+ // We have to ensure that the native code we call directly (such as @CriticalNative
+ // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
+ // which are non-volatile for ART, but volatile for Native calls. This will ensure
+ // that they are saved in the prologue and properly restored.
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
+ locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
+ }
+}
+
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
ArtMethod* method ATTRIBUTE_UNUSED) {
@@ -998,7 +1009,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -1032,15 +1043,61 @@
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
__ call(&frame_entry_label_);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
+ kNativeStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ subq(CpuRegister(RSP), Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
+ }
+ // (callee_method + offset_of_jni_entry_point)()
+ __ call(Address(callee_method.AsRegister<CpuRegister>(),
+ ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kInt8:
+ __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kUint16:
+ __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kInt16:
+ __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ addq(CpuRegister(RSP), Immediate(out_frame_size));
+ __ cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
// (callee_method + offset_of_quick_compiled_code)()
__ call(Address(callee_method.AsRegister<CpuRegister>(),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86_64PointerSize).SizeValue()));
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
break;
}
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
DCHECK(!IsLeafMethod());
}
@@ -2493,6 +2550,51 @@
return Location::NoLocation();
}
+Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFloatRegistersLength) {
+ location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
+ ++fpr_index_;
+ }
+ } else {
+ // Native ABI uses the same registers as managed, except that the method register RDI
+ // is a normal argument.
+ if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
+ location = Location::RegisterLocation(
+ gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
+ const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
+ // Pass the method in the hidden argument RAX.
+ return Location::RegisterLocation(RAX);
+}
+
void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
// The trampoline uses the same calling convention as dex calling conventions,
// except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
@@ -2514,7 +2616,14 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 01810f4..dcdd632 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -79,6 +79,31 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
+class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorX86_64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t fpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64);
+};
+
class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionX86_64() {}
@@ -609,6 +634,8 @@
void MaybeIncrementHotness(bool is_frame_entry);
+ static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
+
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.
static constexpr int32_t kDummy32BitOffset = 256;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index cd68b2a..60e1279 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1530,8 +1530,8 @@
if (invoke->IsInvokeStaticOrDirect() &&
HInvokeStaticOrDirect::NeedsCurrentMethodInput(
- invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
- DCHECK_EQ(argument_index, invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex());
+ invoke->AsInvokeStaticOrDirect()->GetDispatchInfo())) {
+ DCHECK_EQ(argument_index, invoke->AsInvokeStaticOrDirect()->GetCurrentMethodIndex());
DCHECK(invoke->InputAt(argument_index) == nullptr);
invoke->SetRawInputAt(argument_index, graph_->GetCurrentMethod());
}
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index e24d541..29f815c 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -59,7 +59,12 @@
Location method_loc = MoveArguments(codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
- codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
+ HInvokeStaticOrDirect* invoke_static_or_direct = invoke_->AsInvokeStaticOrDirect();
+ DCHECK_NE(invoke_static_or_direct->GetMethodLoadKind(),
+ HInvokeStaticOrDirect::MethodLoadKind::kRecursive);
+ DCHECK_NE(invoke_static_or_direct->GetCodePtrLocation(),
+ HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative);
+ codegen->GenerateStaticOrDirectCall(invoke_static_or_direct, method_loc, this);
} else {
codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7a0f131..af3fd76 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -398,12 +398,7 @@
locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
locations->SetOut(Location::FpuRegisterLocation(XMM0));
- // We have to ensure that the native code doesn't clobber the XMM registers which are
- // non-volatile for ART, but volatile for Native calls. This will ensure that they are
- // saved in the prologue and properly restored.
- for (FloatRegister fp_reg : non_volatile_xmm_regs) {
- locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
- }
+ CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
}
static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
@@ -535,12 +530,7 @@
locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
locations->SetOut(Location::FpuRegisterLocation(XMM0));
- // We have to ensure that the native code doesn't clobber the XMM registers which are
- // non-volatile for ART, but volatile for Native calls. This will ensure that they are
- // saved in the prologue and properly restored.
- for (FloatRegister fp_reg : non_volatile_xmm_regs) {
- locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
- }
+ CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
}
void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e562b87..0eece84 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -4604,6 +4604,11 @@
// Recursive call, use local PC-relative call instruction.
kCallSelf,
+ // Use native pointer from the Artmethod*.
+ // Used for @CriticalNative to avoid going through the compiled stub. This call goes through
+ // a special resolution stub if the class is not initialized or no native code is registered.
+ kCallCriticalNative,
+
// Use code pointer from the ArtMethod*.
// Used when we don't know the target code. This is also the last-resort-kind used when
// other kinds are unimplemented or impractical (i.e. slow) on a particular architecture.
@@ -4633,9 +4638,9 @@
: HInvoke(kInvokeStaticOrDirect,
allocator,
number_of_arguments,
- // There is potentially one extra argument for the HCurrentMethod node, and
- // potentially one other if the clinit check is explicit.
- (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
+ // There is potentially one extra argument for the HCurrentMethod input,
+ // and one other if the clinit check is explicit. These can be removed later.
+ (NeedsCurrentMethodInput(dispatch_info) ? 1u : 0u) +
(clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u),
return_type,
dex_pc,
@@ -4649,17 +4654,17 @@
bool IsClonable() const override { return true; }
- void SetDispatchInfo(const DispatchInfo& dispatch_info) {
+ void SetDispatchInfo(DispatchInfo dispatch_info) {
bool had_current_method_input = HasCurrentMethodInput();
- bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind);
+ bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info);
// Using the current method is the default and once we find a better
// method load kind, we should not go back to using the current method.
DCHECK(had_current_method_input || !needs_current_method_input);
if (had_current_method_input && !needs_current_method_input) {
- DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
- RemoveInputAt(GetSpecialInputIndex());
+ DCHECK_EQ(InputAt(GetCurrentMethodIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+ RemoveInputAt(GetCurrentMethodIndex());
}
dispatch_info_ = dispatch_info;
}
@@ -4668,14 +4673,6 @@
return dispatch_info_;
}
- void AddSpecialInput(HInstruction* input) {
- // We allow only one special input.
- DCHECK(!IsStringInit() && !HasCurrentMethodInput());
- DCHECK(InputCount() == GetSpecialInputIndex() ||
- (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck()));
- InsertInputAt(GetSpecialInputIndex(), input);
- }
-
using HInstruction::GetInputRecords; // Keep the const version visible.
ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override {
ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords();
@@ -4696,7 +4693,7 @@
}
bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
- // We access the method via the dex cache so we can't do an implicit null check.
+ // We do not access the method via object reference, so we cannot do an implicit null check.
// TODO: for intrinsics we can generate implicit null checks.
return false;
}
@@ -4705,14 +4702,6 @@
return GetType() == DataType::Type::kReference && !IsStringInit();
}
- // Get the index of the special input, if any.
- //
- // If the invoke HasCurrentMethodInput(), the "special input" is the current
- // method pointer; otherwise there may be one platform-specific special input,
- // such as PC-relative addressing base.
- uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
- bool HasSpecialInput() const { return GetNumberOfArguments() != InputCount(); }
-
MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
@@ -4724,17 +4713,6 @@
GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo ||
GetMethodLoadKind() == MethodLoadKind::kBssEntry;
}
- bool HasCurrentMethodInput() const {
- // This function can be called only after the invoke has been fully initialized by the builder.
- if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
- DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod());
- return true;
- } else {
- DCHECK(InputCount() == GetSpecialInputIndex() ||
- !InputAt(GetSpecialInputIndex())->IsCurrentMethod());
- return false;
- }
- }
QuickEntrypointEnum GetStringInitEntryPoint() const {
DCHECK(IsStringInit());
@@ -4761,6 +4739,60 @@
return target_method_;
}
+ // Does this method load kind need the current method as an input?
+ static bool NeedsCurrentMethodInput(DispatchInfo dispatch_info) {
+ return dispatch_info.method_load_kind == MethodLoadKind::kRecursive ||
+ dispatch_info.method_load_kind == MethodLoadKind::kRuntimeCall ||
+ dispatch_info.code_ptr_location == CodePtrLocation::kCallCriticalNative;
+ }
+
+ // Get the index of the current method input.
+ size_t GetCurrentMethodIndex() const {
+ DCHECK(HasCurrentMethodInput());
+ return GetCurrentMethodIndexUnchecked();
+ }
+ size_t GetCurrentMethodIndexUnchecked() const {
+ return GetNumberOfArguments();
+ }
+
+ // Check if the method has a current method input.
+ bool HasCurrentMethodInput() const {
+ if (NeedsCurrentMethodInput(GetDispatchInfo())) {
+ DCHECK(InputAt(GetCurrentMethodIndexUnchecked()) == nullptr || // During argument setup.
+ InputAt(GetCurrentMethodIndexUnchecked())->IsCurrentMethod());
+ return true;
+ } else {
+ DCHECK(InputCount() == GetCurrentMethodIndexUnchecked() ||
+ InputAt(GetCurrentMethodIndexUnchecked()) == nullptr || // During argument setup.
+ !InputAt(GetCurrentMethodIndexUnchecked())->IsCurrentMethod());
+ return false;
+ }
+ }
+
+ // Get the index of the special input.
+ size_t GetSpecialInputIndex() const {
+ DCHECK(HasSpecialInput());
+ return GetSpecialInputIndexUnchecked();
+ }
+ size_t GetSpecialInputIndexUnchecked() const {
+ return GetNumberOfArguments() + (HasCurrentMethodInput() ? 1u : 0u);
+ }
+
+ // Check if the method has a special input.
+ bool HasSpecialInput() const {
+ size_t other_inputs =
+ GetSpecialInputIndexUnchecked() + (IsStaticWithExplicitClinitCheck() ? 1u : 0u);
+ size_t input_count = InputCount();
+ DCHECK_LE(input_count - other_inputs, 1u) << other_inputs << " " << input_count;
+ return other_inputs != input_count;
+ }
+
+ void AddSpecialInput(HInstruction* input) {
+ // We allow only one special input.
+ DCHECK(!HasSpecialInput());
+ InsertInputAt(GetSpecialInputIndexUnchecked(), input);
+ }
+
// Remove the HClinitCheck or the replacement HLoadClass (set as last input by
// PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck)
// instruction; only relevant for static calls with explicit clinit check.
@@ -4788,11 +4820,6 @@
return IsStatic() && (GetClinitCheckRequirement() == ClinitCheckRequirement::kImplicit);
}
- // Does this method load kind need the current method as an input?
- static bool NeedsCurrentMethodInput(MethodLoadKind kind) {
- return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kRuntimeCall;
- }
-
DECLARE_INSTRUCTION(InvokeStaticOrDirect);
protected:
@@ -4815,6 +4842,7 @@
DispatchInfo dispatch_info_;
};
std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::CodePtrLocation rhs);
std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
class HInvokeVirtual final : public HInvoke {
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 4ff293c..3ea1918 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -195,15 +195,6 @@
void HandleInvoke(HInvoke* invoke) {
HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
- // We can't add the method address if we already have a current method pointer.
- // This may arise when sharpening doesn't remove the current method pointer from the invoke.
- if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) {
- // Note: This happens only for recursive calls (including compiling an intrinsic
- // by faking a call to itself; we use kRuntimeCall for this case).
- DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind());
- return;
- }
-
// If this is an invoke-static/-direct with PC-relative addressing (within boot image
// or using .bss or .data.bimg.rel.ro), we need the PC-relative address base.
bool base_added = false;
@@ -246,7 +237,6 @@
// This intrinsic needs the constant area.
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
- DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
invoke_static_or_direct->AddSpecialInput(method_address);
}
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1539421..04a8eab 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -124,6 +124,13 @@
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
}
+ if (method_load_kind != HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall &&
+ callee->IsCriticalNative()) {
+ DCHECK_NE(method_load_kind, HInvokeStaticOrDirect::MethodLoadKind::kRecursive);
+ DCHECK(callee->IsStatic());
+ code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative;
+ }
+
if (codegen->GetGraph()->IsDebuggable()) {
// For debuggable apps always use the code pointer from ArtMethod
// so that we don't circumvent instrumentation stubs if installed.