Direct calls to @CriticalNative methods.
Emit direct calls from compiled managed code to the native
code registered with the method, avoiding the JNI stub.
Golem results:
art-opt-cc x86 x86-64 arm arm64
NativeDowncallStaticCritical +12.5% +62.5% +75.9% +41.7%
NativeDowncallStaticCritical6 +55.6% +87.5% +72.1% +35.3%
art-opt x86 x86-64 arm arm64
NativeDowncallStaticCritical +28.6% +85.6% +76.4% +38.4%
NativeDowncallStaticCritical6 +44.6% +44.6% +74.6% +32.2%
Test: Covered by 178-app-image-native-method.
Test: m test-art-host-gtest
Test: testrunner.py --host --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: testrunner.py --target --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use -t 178
Test: aosp_cf_x86_phone-userdebug boots.
Test: aosp_cf_x86_phone-userdebug/jitzygote boots.
Bug: 112189621
Change-Id: I8b37da51e8fe0b7bc513bb81b127fe0416068866
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4a618de..d108623 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -18,6 +18,7 @@
#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/arm64/jni_frame_arm64.h"
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
@@ -870,6 +871,49 @@
return LocationFrom(kArtMethodRegister);
}
+Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFPRegistersLength) {
+ location = LocationFrom(kParameterFPRegisters[fpr_index_]);
+ ++fpr_index_;
+ }
+ } else {
+ // Native ABI uses the same registers as managed, except that the method register x0
+ // is a normal argument.
+ if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
+ location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ if (DataType::Is64BitType(type)) {
+ location = Location::DoubleStackSlot(stack_offset_);
+ } else {
+ location = Location::StackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
+ // We perform conversion to the managed ABI return register after the call if needed.
+ InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
+ // Pass the method in the hidden argument x15.
+ return Location::RegisterLocation(x15.GetCode());
+}
+
CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -4295,7 +4339,13 @@
return;
}
- HandleInvoke(invoke);
+ if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+ } else {
+ HandleInvoke(invoke);
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
@@ -4327,7 +4377,7 @@
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
@@ -4373,6 +4423,19 @@
}
}
+ auto call_code_pointer_member = [&](MemberOffset offset) {
+ // LR = callee_method->member;
+ __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+ {
+ // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ // lr()
+ __ blr(lr);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ }
+ };
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
{
@@ -4384,20 +4447,50 @@
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
break;
- case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
- // LR = callee_method->entry_point_from_quick_compiled_code_;
- __ Ldr(lr, MemOperand(
- XRegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
- {
- // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
- ExactAssemblyScope eas(GetVIXLAssembler(),
- kInstructionSize,
- CodeBufferCheckScope::kExactSize);
- // lr()
- __ blr(lr);
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
+ HParallelMove parallel_move(GetGraph()->GetAllocator());
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
+ kAapcs64StackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke, ¶llel_move);
+ if (out_frame_size != 0u) {
+ __ Claim(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(out_frame_size);
+ GetMoveResolver()->EmitNativeCode(¶llel_move);
}
+ call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArm64PointerSize));
+ // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
+ switch (invoke->GetType()) {
+ case DataType::Type::kBool:
+ __ Ubfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kInt8:
+ __ Sbfx(w0, w0, 0, 8);
+ break;
+ case DataType::Type::kUint16:
+ __ Ubfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt16:
+ __ Sbfx(w0, w0, 0, 16);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ case DataType::Type::kVoid:
+ break;
+ default:
+ DCHECK(false) << invoke->GetType();
+ break;
+ }
+ if (out_frame_size != 0u) {
+ __ Drop(out_frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-out_frame_size);
+ }
+ break;
+ }
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+ call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize));
break;
}
@@ -4819,14 +4912,9 @@
return;
}
- {
- // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- LocationSummary* locations = invoke->GetLocations();
- codegen_->GenerateStaticOrDirectCall(
- invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
- }
+ LocationSummary* locations = invoke->GetLocations();
+ codegen_->GenerateStaticOrDirectCall(
+ invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}