Direct calls to @CriticalNative methods.
Emit direct calls from compiled managed code to the native
code registered with the method, avoiding the JNI stub.
Golem results:
art-opt-cc x86 x86-64 arm arm64
NativeDowncallStaticCritical +12.5% +62.5% +75.9% +41.7%
NativeDowncallStaticCritical6 +55.6% +87.5% +72.1% +35.3%
art-opt x86 x86-64 arm arm64
NativeDowncallStaticCritical +28.6% +85.6% +76.4% +38.4%
NativeDowncallStaticCritical6 +44.6% +44.6% +74.6% +32.2%
Test: Covered by 178-app-image-native-method.
Test: m test-art-host-gtest
Test: testrunner.py --host --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: testrunner.py --target --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use -t 178
Test: aosp_cf_x86_phone-userdebug boots.
Test: aosp_cf_x86_phone-userdebug/jitzygote boots.
Bug: 112189621
Change-Id: I8b37da51e8fe0b7bc513bb81b127fe0416068866
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 01810f4..dcdd632 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -79,6 +79,31 @@
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
+class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorX86_64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t fpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64);
+};
+
class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionX86_64() {}
@@ -609,6 +634,8 @@
void MaybeIncrementHotness(bool is_frame_entry);
+ static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
+
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.
static constexpr int32_t kDummy32BitOffset = 256;