diff options
author | 2019-12-10 10:17:23 +0000 | |
---|---|---|
committer | 2019-12-18 06:50:39 +0000 | |
commit | 00391824f4ee89f9fbed178a1ee32bc29fa77b3b (patch) | |
tree | aea6bc5e49801c5b4816257ab16a97181ef0d911 | |
parent | 001e5b33ba7065dde0b85450830b605733ae1685 (diff) |
Add an implementation of Nterp for x64.
And enable it on x64 when runtime and ArtMethod requirements are met
(see nterp.cc).
Test: test.py
Bug: 112676029
Change-Id: I772cd20a20fdc0ff99529df7495801d773091584
29 files changed, 3799 insertions, 34 deletions
diff --git a/runtime/Android.bp b/runtime/Android.bp index baa921dc5b..e09d828838 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -344,8 +344,9 @@ libart_cc_defaults { // Note that the fault_handler_x86.cc is not a mistake. This file is // shared between the x86 and x86_64 architectures. "interpreter/mterp/mterp.cc", - "interpreter/mterp/nterp_stub.cc", + "interpreter/mterp/nterp.cc", ":libart_mterp.x86_64", + ":libart_mterp.x86_64ng", "arch/x86_64/context_x86_64.cc", "arch/x86_64/entrypoints_init_x86_64.cc", "arch/x86_64/jni_entrypoints_x86_64.S", @@ -806,3 +807,16 @@ genrule { ], cmd: "$(location interpreter/mterp/gen_mterp.py) $(out) $(in)", } + +genrule { + name: "libart_mterp.x86_64ng", + out: ["mterp_x86_64ng.S"], + srcs: ["interpreter/mterp/x86_64ng/*.S", + "interpreter/mterp/x86_64/arithmetic.S", + "interpreter/mterp/x86_64/floating_point.S"], + tool_files: [ + "interpreter/mterp/gen_mterp.py", + "interpreter/mterp/common/gen_setup.py", + ], + cmd: "$(location interpreter/mterp/gen_mterp.py) $(out) $(in)", +} diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S index 596e468830..2b50cdb77d 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.S +++ b/runtime/arch/x86_64/asm_support_x86_64.S @@ -82,6 +82,7 @@ // The restored CFA state should match the CFA state during CFI_REMEMBER_STATE. // `objdump -Wf libart.so | egrep "_cfa|_state"` is useful to audit the opcodes. #define CFI_RESTORE_STATE_AND_DEF_CFA(reg,off) .cfi_restore_state .cfi_def_cfa reg,off + #define CFI_RESTORE_STATE .cfi_restore_state #else // Mac OS' doesn't like cfi_* directives. #define CFI_STARTPROC @@ -93,6 +94,7 @@ #define CFI_REL_OFFSET(reg,size) #define CFI_REMEMBER_STATE #define CFI_RESTORE_STATE_AND_DEF_CFA(off) + #define CFI_RESTORE_STATE #endif // Symbols. diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h index ab38614c98..1e2658cb95 100644 --- a/runtime/arch/x86_64/context_x86_64.h +++ b/runtime/arch/x86_64/context_x86_64.h @@ -45,6 +45,10 @@ class X86_64Context final : public Context { rip_ = new_pc; } + void SetNterpDexPC(uintptr_t dex_pc_ptr) override { + SetGPR(R12, dex_pc_ptr); + } + void SetArg0(uintptr_t new_arg0_value) override { SetGPR(RDI, new_arg0_value); } diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index a0a2365408..ffa772e6c5 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -143,6 +143,8 @@ #include "verifier/class_verifier.h" #include "well_known_classes.h" +#include "interpreter/interpreter_mterp_impl.h" + namespace art { using android::base::StringPrintf; @@ -224,16 +226,25 @@ static void HandleEarlierVerifyError(Thread* self, // Ensures that methods have the kAccSkipAccessChecks bit set. We use the // kAccVerificationAttempted bit on the class access flags to determine whether this has been done // before. -template <bool kNeedsVerified = false> static void EnsureSkipAccessChecksMethods(Handle<mirror::Class> klass, PointerSize pointer_size) REQUIRES_SHARED(Locks::mutator_lock_) { - if (kNeedsVerified) { - // To not fail access-flags access checks, push a minimal state. - mirror::Class::SetStatus(klass, ClassStatus::kVerified, Thread::Current()); - } + Runtime* runtime = Runtime::Current(); + ClassLinker* class_linker = runtime->GetClassLinker(); if (!klass->WasVerificationAttempted()) { klass->SetSkipAccessChecksFlagOnAllMethods(pointer_size); klass->SetVerificationAttempted(); + // Now that the class has passed verification, try to set nterp entrypoints + // to methods that currently use the switch interpreter. + if (interpreter::CanRuntimeUseNterp()) { + for (ArtMethod& m : klass->GetMethods(pointer_size)) { + if (class_linker->IsQuickToInterpreterBridge(m.GetEntryPointFromQuickCompiledCode()) && + interpreter::CanMethodUseNterp(&m)) { + if (klass->IsVisiblyInitialized() || !NeedsClinitCheckBeforeCall(&m)) { + runtime->GetInstrumentation()->UpdateMethodsCode(&m, interpreter::GetNterpEntryPoint()); + } + } + } + } } } @@ -3681,6 +3692,11 @@ const void* ClassLinker::GetQuickOatCodeFor(ArtMethod* method) { // No code and native? Use generic trampoline. return GetQuickGenericJniStub(); } + + if (interpreter::CanRuntimeUseNterp() && interpreter::CanMethodUseNterp(method)) { + return interpreter::GetNterpEntryPoint(); + } + return GetQuickToInterpreterBridge(); } @@ -3778,27 +3794,41 @@ void ClassLinker::FixupStaticTrampolines(ObjPtr<mirror::Class> klass) { // Only update static methods. continue; } - if (!IsQuickResolutionStub(method->GetEntryPointFromQuickCompiledCode())) { - // Only update methods whose entrypoint is the resolution stub. - continue; - } const void* quick_code = nullptr; + + // In order: + // 1) Check if we have AOT Code. + // 2) Check if we have JIT Code. + // 3) Check if we can use Nterp. if (has_oat_class) { OatFile::OatMethod oat_method = oat_class.GetOatMethod(method_index); quick_code = oat_method.GetQuickCode(); } - // Check if we have JIT compiled code for it. + jit::Jit* jit = runtime->GetJit(); if (quick_code == nullptr && jit != nullptr) { quick_code = jit->GetCodeCache()->GetSavedEntryPointOfPreCompiledMethod(method); } + + if (quick_code == nullptr && + interpreter::CanRuntimeUseNterp() && + interpreter::CanMethodUseNterp(method)) { + quick_code = interpreter::GetNterpEntryPoint(); + } + // Check whether the method is native, in which case it's generic JNI. if (quick_code == nullptr && method->IsNative()) { quick_code = GetQuickGenericJniStub(); } else if (ShouldUseInterpreterEntrypoint(method, quick_code)) { // Use interpreter entry point. + if (IsQuickToInterpreterBridge(method->GetEntryPointFromQuickCompiledCode())) { + // If we have the trampoline or the bridge already, no need to update. + // This saves in not dirtying boot image memory. + continue; + } quick_code = GetQuickToInterpreterBridge(); } + CHECK(quick_code != nullptr); runtime->GetInstrumentation()->UpdateMethodsCode(method, quick_code); } // Ignore virtual methods on the iterator. diff --git a/runtime/interpreter/cfi_asm_support.h b/runtime/interpreter/cfi_asm_support.h index c1e5fb5615..04812e1e7f 100644 --- a/runtime/interpreter/cfi_asm_support.h +++ b/runtime/interpreter/cfi_asm_support.h @@ -44,9 +44,16 @@ 0x0c /* DW_OP_const4u */, 0x44, 0x45, 0x58, 0x31, /* magic = "DEX1" */ \ 0x13 /* DW_OP_drop */, \ 0x92 /* DW_OP_bregx */, dexReg, (dexOffset & 0x7F) /* 1-byte SLEB128 */ + + #define CFI_DEFINE_CFA_DEREF(reg, offset, size) .cfi_escape \ + 0x0f /* DW_CFA_expression */, 6 /* size */, \ + 0x92 /* bregx */, reg, (offset & 0x7F), \ + 0x06 /* DW_OP_DEREF */, \ + 0x23 /* DW_OP_plus_uconst */, size #else // Mac OS doesn't like cfi_* directives. #define CFI_DEFINE_DEX_PC_WITH_OFFSET(tmpReg, dexReg, dexOffset) + #define CFI_DEFINE_CFA_DEREF(reg, offset) #endif #endif // ART_RUNTIME_INTERPRETER_CFI_ASM_SUPPORT_H_ diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index 9fccde9d2d..d7fee020a2 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -245,13 +245,14 @@ static ALWAYS_INLINE bool DoInvoke(Thread* self, const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c(); ArtMethod* sf_method = shadow_frame.GetMethod(); - // Try to find the method in small thread-local cache first. + // Try to find the method in small thread-local cache first (only used when + // nterp is not used as mterp and nterp use the cache in an incompatible way). InterpreterCache* tls_cache = self->GetInterpreterCache(); size_t tls_value; ArtMethod* resolved_method; if (is_quick) { resolved_method = nullptr; // We don't know/care what the original method was. - } else if (LIKELY(tls_cache->Get(inst, &tls_value))) { + } else if (!IsNterpSupported() && LIKELY(tls_cache->Get(inst, &tls_value))) { resolved_method = reinterpret_cast<ArtMethod*>(tls_value); } else { ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); @@ -264,7 +265,9 @@ static ALWAYS_INLINE bool DoInvoke(Thread* self, result->SetJ(0); return false; } - tls_cache->Set(inst, reinterpret_cast<size_t>(resolved_method)); + if (!IsNterpSupported()) { + tls_cache->Set(inst, reinterpret_cast<size_t>(resolved_method)); + } } // Null pointer check and virtual method resolution. diff --git a/runtime/interpreter/mterp/mterp.h b/runtime/interpreter/mterp/mterp.h index dfbba29952..7813fca3b7 100644 --- a/runtime/interpreter/mterp/mterp.h +++ b/runtime/interpreter/mterp/mterp.h @@ -20,6 +20,8 @@ #include <cstddef> #include <cstdint> +#include "base/globals.h" + /* * Mterp assembly handler bases */ @@ -31,6 +33,7 @@ extern "C" void* artNterpAsmInstructionEnd[]; namespace art { +class ArtMethod; class Thread; namespace interpreter { @@ -40,6 +43,8 @@ void CheckMterpAsmConstants(); void CheckNterpAsmConstants(); bool CanUseMterp(); bool IsNterpSupported(); +bool CanRuntimeUseNterp(); +bool CanMethodUseNterp(ArtMethod* method); const void* GetNterpEntryPoint(); // Poison value for TestExportPC. If we segfault with this value, it means that a mterp @@ -51,6 +56,9 @@ constexpr bool kTestExportPC = false; constexpr size_t kMterpHandlerSize = 128; +// The maximum we will allow an nterp frame to be. +constexpr size_t kMaxNterpFrame = 3 * KB; + } // namespace interpreter } // namespace art diff --git a/runtime/interpreter/mterp/nterp.cc b/runtime/interpreter/mterp/nterp.cc new file mode 100644 index 0000000000..1e52492058 --- /dev/null +++ b/runtime/interpreter/mterp/nterp.cc @@ -0,0 +1,598 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Mterp entry point and support functions. + */ +#include "mterp.h" + +#include "base/quasi_atomic.h" +#include "dex/dex_instruction_utils.h" +#include "debugger.h" +#include "entrypoints/entrypoint_utils-inl.h" +#include "interpreter/interpreter_common.h" +#include "interpreter/interpreter_intrinsics.h" +#include "interpreter/shadow_frame-inl.h" +#include "mirror/string-alloc-inl.h" +#include "nterp_helpers.h" + +namespace art { +namespace interpreter { + +bool IsNterpSupported() { + return !kPoisonHeapReferences && kUseReadBarrier; +} + +bool CanRuntimeUseNterp() REQUIRES_SHARED(Locks::mutator_lock_) { + // Nterp has the same restrictions as Mterp. + return CanUseMterp(); +} + +bool CanMethodUseNterp(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { + return method->SkipAccessChecks() && + !method->IsNative() && + method->GetDexFile()->IsStandardDexFile() && + NterpGetFrameSize(method) < kMaxNterpFrame; +} + +const void* GetNterpEntryPoint() { + return reinterpret_cast<const void*>(interpreter::ExecuteNterpImpl); +} + +/* + * Verify some constants used by the nterp interpreter. + */ +void CheckNterpAsmConstants() { + /* + * If we're using computed goto instruction transitions, make sure + * none of the handlers overflows the byte limit. This won't tell + * which one did, but if any one is too big the total size will + * overflow. + */ + const int width = kMterpHandlerSize; + ptrdiff_t interp_size = reinterpret_cast<uintptr_t>(artNterpAsmInstructionEnd) - + reinterpret_cast<uintptr_t>(artNterpAsmInstructionStart); + if ((interp_size == 0) || (interp_size != (art::kNumPackedOpcodes * width))) { + LOG(FATAL) << "ERROR: unexpected asm interp size " << interp_size + << "(did an instruction handler exceed " << width << " bytes?)"; + } +} + +template<typename T> +inline void UpdateCache(Thread* self, uint16_t* dex_pc_ptr, T value) { + DCHECK(kUseReadBarrier) << "Nterp only works with read barriers"; + // For simplicity, only update the cache if weak ref accesses are enabled. If + // they are disabled, this means the GC is processing the cache, and is + // reading it concurrently. + if (self->GetWeakRefAccessEnabled()) { + self->GetInterpreterCache()->Set(dex_pc_ptr, value); + } +} + +template<typename T> +inline void UpdateCache(Thread* self, uint16_t* dex_pc_ptr, T* value) { + UpdateCache(self, dex_pc_ptr, reinterpret_cast<size_t>(value)); +} + +extern "C" const dex::CodeItem* NterpGetCodeItem(ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedAssertNoThreadSuspension sants("In nterp"); + return method->GetCodeItem(); +} + +extern "C" const char* NterpGetShorty(ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedAssertNoThreadSuspension sants("In nterp"); + return method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(); +} + +extern "C" const char* NterpGetShortyFromMethodId(ArtMethod* caller, uint32_t method_index) + REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedAssertNoThreadSuspension sants("In nterp"); + return caller->GetDexFile()->GetMethodShorty(method_index); +} + +extern "C" const char* NterpGetShortyFromInvokePolymorphic(ArtMethod* caller, uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedAssertNoThreadSuspension sants("In nterp"); + const Instruction* inst = Instruction::At(dex_pc_ptr); + dex::ProtoIndex proto_idx(inst->Opcode() == Instruction::INVOKE_POLYMORPHIC + ? inst->VRegH_45cc() + : inst->VRegH_4rcc()); + return caller->GetDexFile()->GetShorty(proto_idx); +} + +extern "C" const char* NterpGetShortyFromInvokeCustom(ArtMethod* caller, uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedAssertNoThreadSuspension sants("In nterp"); + const Instruction* inst = Instruction::At(dex_pc_ptr); + uint16_t call_site_index = (inst->Opcode() == Instruction::INVOKE_CUSTOM + ? inst->VRegB_35c() + : inst->VRegB_3rc()); + const DexFile* dex_file = caller->GetDexFile(); + dex::ProtoIndex proto_idx = dex_file->GetProtoIndexForCallSite(call_site_index); + return dex_file->GetShorty(proto_idx); +} + +extern "C" size_t NterpGetMethod(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + const Instruction* inst = Instruction::At(dex_pc_ptr); + InvokeType invoke_type = kStatic; + uint16_t method_index = 0; + switch (inst->Opcode()) { + case Instruction::INVOKE_DIRECT: { + method_index = inst->VRegB_35c(); + invoke_type = kDirect; + break; + } + + case Instruction::INVOKE_INTERFACE: { + method_index = inst->VRegB_35c(); + invoke_type = kInterface; + break; + } + + case Instruction::INVOKE_STATIC: { + method_index = inst->VRegB_35c(); + invoke_type = kStatic; + break; + } + + case Instruction::INVOKE_SUPER: { + method_index = inst->VRegB_35c(); + invoke_type = kSuper; + break; + } + case Instruction::INVOKE_VIRTUAL: { + method_index = inst->VRegB_35c(); + invoke_type = kVirtual; + break; + } + + case Instruction::INVOKE_DIRECT_RANGE: { + method_index = inst->VRegB_3rc(); + invoke_type = kDirect; + break; + } + + case Instruction::INVOKE_INTERFACE_RANGE: { + method_index = inst->VRegB_3rc(); + invoke_type = kInterface; + break; + } + + case Instruction::INVOKE_STATIC_RANGE: { + method_index = inst->VRegB_3rc(); + invoke_type = kStatic; + break; + } + + case Instruction::INVOKE_SUPER_RANGE: { + method_index = inst->VRegB_3rc(); + invoke_type = kSuper; + break; + } + + case Instruction::INVOKE_VIRTUAL_RANGE: { + method_index = inst->VRegB_3rc(); + invoke_type = kVirtual; + break; + } + + default: + LOG(FATAL) << "Unknown instruction " << inst->Opcode(); + } + + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + ArtMethod* resolved_method = caller->SkipAccessChecks() + ? class_linker->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( + self, method_index, caller, invoke_type) + : class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>( + self, method_index, caller, invoke_type); + if (resolved_method == nullptr) { + DCHECK(self->IsExceptionPending()); + return 0; + } + + // ResolveMethod returns the method based on the method_id. For super invokes + // we must use the executing class's context to find the right method. + if (invoke_type == kSuper) { + ObjPtr<mirror::Class> executing_class = caller->GetDeclaringClass(); + ObjPtr<mirror::Class> referenced_class = class_linker->LookupResolvedType( + executing_class->GetDexFile().GetMethodId(method_index).class_idx_, + executing_class->GetDexCache(), + executing_class->GetClassLoader()); + DCHECK(referenced_class != nullptr); // We have already resolved a method from this class. + if (!referenced_class->IsAssignableFrom(executing_class)) { + // We cannot determine the target method. + ThrowNoSuchMethodError(invoke_type, + resolved_method->GetDeclaringClass(), + resolved_method->GetName(), + resolved_method->GetSignature()); + return 0; + } + if (referenced_class->IsInterface()) { + resolved_method = referenced_class->FindVirtualMethodForInterfaceSuper( + resolved_method, class_linker->GetImagePointerSize()); + } else { + uint16_t vtable_index = resolved_method->GetMethodIndex(); + ObjPtr<mirror::Class> super_class = executing_class->GetSuperClass(); + if (super_class == nullptr || + !super_class->HasVTable() || + vtable_index >= static_cast<uint32_t>(super_class->GetVTableLength())) { + // Behavior to agree with that of the verifier. + ThrowNoSuchMethodError(invoke_type, + resolved_method->GetDeclaringClass(), + resolved_method->GetName(), + resolved_method->GetSignature()); + return 0; + } else { + resolved_method = executing_class->GetSuperClass()->GetVTableEntry( + vtable_index, class_linker->GetImagePointerSize()); + } + } + } + + if (invoke_type == kInterface) { + UpdateCache(self, dex_pc_ptr, resolved_method->GetImtIndex()); + return resolved_method->GetImtIndex(); + } else if (resolved_method->GetDeclaringClass()->IsStringClass() + && !resolved_method->IsStatic() + && resolved_method->IsConstructor()) { + resolved_method = WellKnownClasses::StringInitToStringFactory(resolved_method); + // Or the result with 1 to notify to nterp this is a string init method. We + // also don't cache the result as we don't want nterp to have its fast path always + // check for it, and we expect a lot more regular calls than string init + // calls. + return reinterpret_cast<size_t>(resolved_method) | 1; + } else if (invoke_type == kVirtual) { + UpdateCache(self, dex_pc_ptr, resolved_method->GetMethodIndex()); + return resolved_method->GetMethodIndex(); + } else { + UpdateCache(self, dex_pc_ptr, resolved_method); + return reinterpret_cast<size_t>(resolved_method); + } +} + +static ArtField* ResolveFieldWithAccessChecks(Thread* self, + ClassLinker* class_linker, + uint16_t field_index, + ArtMethod* caller, + bool is_static, + bool is_put) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (caller->SkipAccessChecks()) { + return class_linker->ResolveField(field_index, caller, is_static); + } + + caller = caller->GetInterfaceMethodIfProxy(kRuntimePointerSize); + + StackHandleScope<2> hs(self); + Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(caller->GetDexCache())); + Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(caller->GetClassLoader())); + + ArtField* resolved_field = class_linker->ResolveFieldJLS(field_index, + h_dex_cache, + h_class_loader); + if (resolved_field == nullptr) { + return nullptr; + } + + ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass(); + if (UNLIKELY(resolved_field->IsStatic() != is_static)) { + ThrowIncompatibleClassChangeErrorField(resolved_field, is_static, caller); + return nullptr; + } + ObjPtr<mirror::Class> referring_class = caller->GetDeclaringClass(); + if (UNLIKELY(!referring_class->CheckResolvedFieldAccess(fields_class, + resolved_field, + caller->GetDexCache(), + field_index))) { + return nullptr; + } + if (UNLIKELY(is_put && resolved_field->IsFinal() && (fields_class != referring_class))) { + ThrowIllegalAccessErrorFinalField(caller, resolved_field); + return nullptr; + } + return resolved_field; +} + +extern "C" size_t NterpGetStaticField(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + const Instruction* inst = Instruction::At(dex_pc_ptr); + uint16_t field_index = inst->VRegB_21c(); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + ArtField* resolved_field = ResolveFieldWithAccessChecks( + self, + class_linker, + field_index, + caller, + /* is_static */ true, + /* is_put */ IsInstructionSPut(inst->Opcode())); + + if (resolved_field == nullptr) { + DCHECK(self->IsExceptionPending()); + return 0; + } + if (UNLIKELY(!resolved_field->GetDeclaringClass()->IsVisiblyInitialized())) { + StackHandleScope<1> hs(self); + Handle<mirror::Class> h_class(hs.NewHandle(resolved_field->GetDeclaringClass())); + if (UNLIKELY(!class_linker->EnsureInitialized( + self, h_class, /*can_init_fields=*/ true, /*can_init_parents=*/ true))) { + DCHECK(self->IsExceptionPending()); + return 0; + } + DCHECK(h_class->IsInitializing()); + } + if (resolved_field->IsVolatile()) { + // Or the result with 1 to notify to nterp this is a volatile field. We + // also don't cache the result as we don't want nterp to have its fast path always + // check for it. + return reinterpret_cast<size_t>(resolved_field) | 1; + } else { + UpdateCache(self, dex_pc_ptr, resolved_field); + return reinterpret_cast<size_t>(resolved_field); + } +} + +extern "C" uint32_t NterpGetInstanceFieldOffset(Thread* self, + ArtMethod* caller, + uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + const Instruction* inst = Instruction::At(dex_pc_ptr); + uint16_t field_index = inst->VRegC_22c(); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + ArtField* resolved_field = ResolveFieldWithAccessChecks( + self, + class_linker, + field_index, + caller, + /* is_static */ false, + /* is_put */ IsInstructionIPut(inst->Opcode())); + if (resolved_field == nullptr) { + DCHECK(self->IsExceptionPending()); + return 0; + } + if (resolved_field->IsVolatile()) { + // Don't cache for a volatile field, and return a negative offset as marker + // of volatile. + return -resolved_field->GetOffset().Uint32Value(); + } + UpdateCache(self, dex_pc_ptr, resolved_field->GetOffset().Uint32Value()); + return resolved_field->GetOffset().Uint32Value(); +} + +extern "C" mirror::Object* NterpGetClassOrAllocateObject(Thread* self, + ArtMethod* caller, + uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + const Instruction* inst = Instruction::At(dex_pc_ptr); + dex::TypeIndex index; + switch (inst->Opcode()) { + case Instruction::NEW_INSTANCE: + index = dex::TypeIndex(inst->VRegB_21c()); + break; + case Instruction::CHECK_CAST: + index = dex::TypeIndex(inst->VRegB_21c()); + break; + case Instruction::INSTANCE_OF: + index = dex::TypeIndex(inst->VRegC_22c()); + break; + case Instruction::CONST_CLASS: + index = dex::TypeIndex(inst->VRegB_21c()); + break; + case Instruction::NEW_ARRAY: + index = dex::TypeIndex(inst->VRegC_22c()); + break; + default: + LOG(FATAL) << "Unreachable"; + } + ObjPtr<mirror::Class> c = + ResolveVerifyAndClinit(index, + caller, + self, + /* can_run_clinit= */ false, + /* verify_access= */ !caller->SkipAccessChecks()); + if (c == nullptr) { + DCHECK(self->IsExceptionPending()); + return nullptr; + } + + if (inst->Opcode() == Instruction::NEW_INSTANCE) { + gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); + if (UNLIKELY(c->IsStringClass())) { + // We don't cache the class for strings as we need to special case their + // allocation. + return mirror::String::AllocEmptyString(self, allocator_type).Ptr(); + } else { + if (!c->IsFinalizable() && c->IsInstantiable()) { + // Cache non-finalizable classes for next calls. + UpdateCache(self, dex_pc_ptr, c.Ptr()); + } + return AllocObjectFromCode(c, self, allocator_type).Ptr(); + } + } else { + // For all other cases, cache the class. + UpdateCache(self, dex_pc_ptr, c.Ptr()); + } + return c.Ptr(); +} + +extern "C" mirror::Object* NterpLoadObject(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + const Instruction* inst = Instruction::At(dex_pc_ptr); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + switch (inst->Opcode()) { + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: { + dex::StringIndex string_index( + (inst->Opcode() == Instruction::CONST_STRING) + ? inst->VRegB_21c() + : inst->VRegB_31c()); + ObjPtr<mirror::String> str = class_linker->ResolveString(string_index, caller); + if (str == nullptr) { + DCHECK(self->IsExceptionPending()); + return nullptr; + } + UpdateCache(self, dex_pc_ptr, str.Ptr()); + return str.Ptr(); + } + case Instruction::CONST_METHOD_HANDLE: { + // Don't cache: we don't expect this to be performance sensitive, and we + // don't want the cache to conflict with a performance sensitive entry. + return class_linker->ResolveMethodHandle(self, inst->VRegB_21c(), caller).Ptr(); + } + case Instruction::CONST_METHOD_TYPE: { + // Don't cache: we don't expect this to be performance sensitive, and we + // don't want the cache to conflict with a performance sensitive entry. + return class_linker->ResolveMethodType( + self, dex::ProtoIndex(inst->VRegB_21c()), caller).Ptr(); + } + default: + LOG(FATAL) << "Unreachable"; + } + return nullptr; +} + +extern "C" void NterpUnimplemented() { + LOG(FATAL) << "Unimplemented"; +} + +static mirror::Object* DoFilledNewArray(Thread* self, + ArtMethod* caller, + uint16_t* dex_pc_ptr, + int32_t* regs, + bool is_range) + REQUIRES_SHARED(Locks::mutator_lock_) { + const Instruction* inst = Instruction::At(dex_pc_ptr); + if (kIsDebugBuild) { + if (is_range) { + DCHECK_EQ(inst->Opcode(), Instruction::FILLED_NEW_ARRAY_RANGE); + } else { + DCHECK_EQ(inst->Opcode(), Instruction::FILLED_NEW_ARRAY); + } + } + const int32_t length = is_range ? inst->VRegA_3rc() : inst->VRegA_35c(); + DCHECK_GE(length, 0); + if (!is_range) { + // Checks FILLED_NEW_ARRAY's length does not exceed 5 arguments. + DCHECK_LE(length, 5); + } + uint16_t type_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c(); + ObjPtr<mirror::Class> array_class = ResolveVerifyAndClinit(dex::TypeIndex(type_idx), + caller, + self, + /* can_run_clinit= */ true, + /* verify_access= */ false); + if (UNLIKELY(array_class == nullptr)) { + DCHECK(self->IsExceptionPending()); + return nullptr; + } + DCHECK(array_class->IsArrayClass()); + ObjPtr<mirror::Class> component_class = array_class->GetComponentType(); + const bool is_primitive_int_component = component_class->IsPrimitiveInt(); + if (UNLIKELY(component_class->IsPrimitive() && !is_primitive_int_component)) { + if (component_class->IsPrimitiveLong() || component_class->IsPrimitiveDouble()) { + ThrowRuntimeException("Bad filled array request for type %s", + component_class->PrettyDescriptor().c_str()); + } else { + self->ThrowNewExceptionF( + "Ljava/lang/InternalError;", + "Found type %s; filled-new-array not implemented for anything but 'int'", + component_class->PrettyDescriptor().c_str()); + } + return nullptr; + } + ObjPtr<mirror::Object> new_array = mirror::Array::Alloc( + self, + array_class, + length, + array_class->GetComponentSizeShift(), + Runtime::Current()->GetHeap()->GetCurrentAllocator()); + if (UNLIKELY(new_array == nullptr)) { + self->AssertPendingOOMException(); + return nullptr; + } + uint32_t arg[Instruction::kMaxVarArgRegs]; // only used in filled-new-array. + uint32_t vregC = 0; // only used in filled-new-array-range. + if (is_range) { + vregC = inst->VRegC_3rc(); + } else { + inst->GetVarArgs(arg); + } + for (int32_t i = 0; i < length; ++i) { + size_t src_reg = is_range ? vregC + i : arg[i]; + if (is_primitive_int_component) { + new_array->AsIntArray()->SetWithoutChecks</* kTransactionActive= */ false>(i, regs[src_reg]); + } else { + new_array->AsObjectArray<mirror::Object>()->SetWithoutChecks</* kTransactionActive= */ false>( + i, reinterpret_cast<mirror::Object*>(regs[src_reg])); + } + } + return new_array.Ptr(); +} + +extern "C" mirror::Object* NterpFilledNewArray(Thread* self, + ArtMethod* caller, + int32_t* registers, + uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + return DoFilledNewArray(self, caller, dex_pc_ptr, registers, /* is_range= */ false); +} + +extern "C" mirror::Object* NterpFilledNewArrayRange(Thread* self, + ArtMethod* caller, + int32_t* registers, + uint16_t* dex_pc_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + return DoFilledNewArray(self, caller, dex_pc_ptr, registers, /* is_range= */ true); +} + +extern "C" jit::OsrData* NterpHotMethod(ArtMethod* method, uint16_t* dex_pc_ptr, uint32_t* vregs) + REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedAssertNoThreadSuspension sants("In nterp"); + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr) { + // Nterp passes null on entry where we don't want to OSR. + if (dex_pc_ptr != nullptr) { + // This could be a loop back edge, check if we can OSR. + CodeItemInstructionAccessor accessor(method->DexInstructions()); + uint32_t dex_pc = dex_pc_ptr - accessor.Insns(); + jit::OsrData* osr_data = jit->PrepareForOsr( + method->GetInterfaceMethodIfProxy(kRuntimePointerSize), dex_pc, vregs); + if (osr_data != nullptr) { + return osr_data; + } + } + jit->EnqueueCompilationFromNterp(method, Thread::Current()); + } + return nullptr; +} + +extern "C" ssize_t MterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal); +extern "C" ssize_t NterpDoPackedSwitch(const uint16_t* switchData, int32_t testVal) + REQUIRES_SHARED(Locks::mutator_lock_) { + return MterpDoPackedSwitch(switchData, testVal); +} + +extern "C" ssize_t MterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal); +extern "C" ssize_t NterpDoSparseSwitch(const uint16_t* switchData, int32_t testVal) + REQUIRES_SHARED(Locks::mutator_lock_) { + return MterpDoSparseSwitch(switchData, testVal); +} + +} // namespace interpreter +} // namespace art diff --git a/runtime/interpreter/mterp/nterp_stub.cc b/runtime/interpreter/mterp/nterp_stub.cc index e77f0e3389..c1b1ec351a 100644 --- a/runtime/interpreter/mterp/nterp_stub.cc +++ b/runtime/interpreter/mterp/nterp_stub.cc @@ -21,23 +21,34 @@ */ namespace art { + +class ArtMethod; + namespace interpreter { bool IsNterpSupported() { return false; } -void CheckNterpAsmConstants() { +bool CanRuntimeUseNterp() { + return false; } -extern "C" void ExecuteNterpImpl() REQUIRES_SHARED(Locks::mutator_lock_) { - UNIMPLEMENTED(FATAL); +bool CanMethodUseNterp(ArtMethod* method ATTRIBUTE_UNUSED) { + return false; } const void* GetNterpEntryPoint() { return nullptr; } +void CheckNterpAsmConstants() { +} + +extern "C" void ExecuteNterpImpl() REQUIRES_SHARED(Locks::mutator_lock_) { + UNIMPLEMENTED(FATAL); +} + extern "C" void* artNterpAsmInstructionStart[] = { nullptr }; extern "C" void* artNterpAsmInstructionEnd[] = { nullptr }; diff --git a/runtime/interpreter/mterp/x86_64ng/array.S b/runtime/interpreter/mterp/x86_64ng/array.S new file mode 100644 index 0000000000..baf5f304b1 --- /dev/null +++ b/runtime/interpreter/mterp/x86_64ng/array.S @@ -0,0 +1,151 @@ +%def op_aget(load="movl", shift="4", data_offset="MIRROR_INT_ARRAY_DATA_OFFSET", wide="0", is_object="0"): +/* + * Array get. vAA <- vBB[vCC]. + * + * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide, aget-object + * + */ + /* op vAA, vBB, vCC */ + movzbq 2(rPC), %rax # eax <- BB + movzbq 3(rPC), %rcx # ecx <- CC + GET_VREG %edi, %rax # eax <- vBB (array object) + GET_VREG %esi, %rcx # ecx <- vCC (requested index) + testl %edi, %edi # null array object? + je common_errNullObject # bail if so + cmpl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %esi + jae common_errArrayIndex # index >= length, bail. + .if $wide + movq $data_offset(%rdi,%rsi,8), %rax + SET_WIDE_VREG %rax, rINSTq + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + .elseif $is_object + testb $$READ_BARRIER_TEST_VALUE, GRAY_BYTE_OFFSET(%edi) + $load $data_offset(%rdi,%rsi,$shift), %eax + jnz 2f +1: + SET_VREG_OBJECT %eax, rINSTq + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + // reg00 is eax + call art_quick_read_barrier_mark_reg00 + jmp 1b + .else + $load $data_offset(%rdi,%rsi,$shift), %eax + SET_VREG %eax, rINSTq + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + .endif + +%def op_aget_boolean(): +% op_aget(load="movzbl", shift="1", data_offset="MIRROR_BOOLEAN_ARRAY_DATA_OFFSET", is_object="0") + +%def op_aget_byte(): +% op_aget(load="movsbl", shift="1", data_offset="MIRROR_BYTE_ARRAY_DATA_OFFSET", is_object="0") + +%def op_aget_char(): +% op_aget(load="movzwl", shift="2", data_offset="MIRROR_CHAR_ARRAY_DATA_OFFSET", is_object="0") + +%def op_aget_object(): +% op_aget(load="movl", shift="4", data_offset="MIRROR_OBJECT_ARRAY_DATA_OFFSET", is_object="1") + +%def op_aget_short(): +% op_aget(load="movswl", shift="2", data_offset="MIRROR_SHORT_ARRAY_DATA_OFFSET", is_object="0") + +%def op_aget_wide(): +% op_aget(load="movq", shift="8", data_offset="MIRROR_WIDE_ARRAY_DATA_OFFSET", wide="1", is_object="0") + +%def op_aput(rINST_reg="rINST", store="movl", shift="4", data_offset="MIRROR_INT_ARRAY_DATA_OFFSET", wide="0"): +/* + * Array put. vBB[vCC] <- vAA. + * + * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide + * + */ + /* op vAA, vBB, vCC */ + movzbq 2(rPC), %rax # rax <- BB + movzbq 3(rPC), %rcx # rcx <- CC + GET_VREG %edi, %rax # edi <- vBB (array object) + GET_VREG %esi, %rcx # esi <- vCC (requested index) + testl %edi, %edi # null array object? + je common_errNullObject # bail if so + cmpl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %esi + jae common_errArrayIndex # index >= length, bail. + .if $wide + GET_WIDE_VREG rINSTq, rINSTq + .else + GET_VREG rINST, rINSTq + .endif + $store $rINST_reg, $data_offset(%rdi,%rsi,$shift) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_aput_boolean(): +% op_aput(rINST_reg="rINSTbl", store="movb", shift="1", data_offset="MIRROR_BOOLEAN_ARRAY_DATA_OFFSET", wide="0") + +%def op_aput_byte(): +% op_aput(rINST_reg="rINSTbl", store="movb", shift="1", data_offset="MIRROR_BYTE_ARRAY_DATA_OFFSET", wide="0") + +%def op_aput_char(): +% op_aput(rINST_reg="rINSTw", store="movw", shift="2", data_offset="MIRROR_CHAR_ARRAY_DATA_OFFSET", wide="0") + +%def op_aput_short(): +% op_aput(rINST_reg="rINSTw", store="movw", shift="2", data_offset="MIRROR_SHORT_ARRAY_DATA_OFFSET", wide="0") + +%def op_aput_wide(): +% op_aput(rINST_reg="rINSTq", store="movq", shift="8", data_offset="MIRROR_WIDE_ARRAY_DATA_OFFSET", wide="1") + +%def op_aput_object(): + movzbq 2(rPC), %rax # rax <- BB + movzbq 3(rPC), %rcx # rcx <- CC + GET_VREG %edi, %rax # edi <- vBB (array object) + GET_VREG %esi, %rcx # esi <- vCC (requested index) + testl %edi, %edi # null array object? + je common_errNullObject # bail if so + cmpl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %esi + jae common_errArrayIndex # index >= length, bail. + GET_VREG %edx, rINSTq + call art_quick_aput_obj + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_array_length(): +/* + * Return the length of an array. + */ + movl rINST, %eax # eax <- BA + sarl $$4, rINST # rINST <- B + GET_VREG %ecx, rINSTq # ecx <- vB (object ref) + testl %ecx, %ecx # is null? + je common_errNullObject + andb $$0xf, %al # eax <- A + movl MIRROR_ARRAY_LENGTH_OFFSET(%rcx), rINST + SET_VREG rINST, %rax + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_fill_array_data(): + /* fill-array-data vAA, +BBBBBBBB */ + EXPORT_PC + movslq 2(rPC), %rcx # rcx <- ssssssssBBBBbbbb + leaq (rPC,%rcx,2), OUT_ARG0 # OUT_ARG0 <- PC + ssssssssBBBBbbbb*2 + GET_VREG OUT_32_ARG1, rINSTq # OUT_ARG1 <- vAA (array object) + call art_quick_handle_fill_data + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + +%def op_filled_new_array(helper="nterp_filled_new_array"): +/* + * Create a new array with elements filled from registers. + * + * for: filled-new-array, filled-new-array/range + */ + /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */ + /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */ + EXPORT_PC + movq rSELF:THREAD_SELF_OFFSET, OUT_ARG0 + movq (%rsp), OUT_ARG1 + movq rFP, OUT_ARG2 + movq rPC, OUT_ARG3 + call SYMBOL($helper) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + +%def op_filled_new_array_range(): +% op_filled_new_array(helper="nterp_filled_new_array_range") + +%def op_new_array(): + jmp NterpNewArray diff --git a/runtime/interpreter/mterp/x86_64ng/control_flow.S b/runtime/interpreter/mterp/x86_64ng/control_flow.S new file mode 100644 index 0000000000..35276d4047 --- /dev/null +++ b/runtime/interpreter/mterp/x86_64ng/control_flow.S @@ -0,0 +1,179 @@ +%def bincmp(revcmp=""): +/* + * Generic two-operand compare-and-branch operation. Provide a "revcmp" + * fragment that specifies the *reverse* comparison to perform, e.g. + * for "if-le" you would use "gt". + * + * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le + */ + /* if-cmp vA, vB, +CCCC */ + movl rINST, %ecx # rcx <- A+ + sarl $$4, rINST # rINST <- B + andb $$0xf, %cl # rcx <- A + GET_VREG %eax, %rcx # eax <- vA + cmpl VREG_ADDRESS(rINSTq), %eax # compare (vA, vB) + j${revcmp} 1f + movswq 2(rPC), rINSTq # Get signed branch offset + BRANCH +1: + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def zcmp(revcmp=""): +/* + * Generic one-operand compare-and-branch operation. Provide a "revcmp" + * fragment that specifies the *reverse* comparison to perform, e.g. + * for "if-le" you would use "gt". + * + * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez + */ + /* if-cmp vAA, +BBBB */ + cmpl $$0, VREG_ADDRESS(rINSTq) # compare (vA, 0) + j${revcmp} 1f + movswq 2(rPC), rINSTq # fetch signed displacement + BRANCH +1: + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_goto(): +/* + * Unconditional branch, 8-bit offset. + * + * The branch distance is a signed code-unit offset, which we need to + * double to get a byte offset. + */ + /* goto +AA */ + movsbq rINSTbl, rINSTq # rINSTq <- ssssssAA + BRANCH + +%def op_goto_16(): +/* + * Unconditional branch, 16-bit offset. + * + * The branch distance is a signed code-unit offset, which we need to + * double to get a byte offset. + */ + /* goto/16 +AAAA */ + movswq 2(rPC), rINSTq # rINSTq <- ssssAAAA + BRANCH + +%def op_goto_32(): +/* + * Unconditional branch, 32-bit offset. + * + * The branch distance is a signed code-unit offset, which we need to + * double to get a byte offset. + * + * Because we need the SF bit set, we'll use an adds + * to convert from Dalvik offset to byte offset. + */ + /* goto/32 +AAAAAAAA */ + movslq 2(rPC), rINSTq # rINSTq <- AAAAAAAA + BRANCH + +%def op_if_eq(): +% bincmp(revcmp="ne") + +%def op_if_eqz(): +% zcmp(revcmp="ne") + +%def op_if_ge(): +% bincmp(revcmp="l") + +%def op_if_gez(): +% zcmp(revcmp="l") + +%def op_if_gt(): +% bincmp(revcmp="le") + +%def op_if_gtz(): +% zcmp(revcmp="le") + +%def op_if_le(): +% bincmp(revcmp="g") + +%def op_if_lez(): +% zcmp(revcmp="g") + +%def op_if_lt(): +% bincmp(revcmp="ge") + +%def op_if_ltz(): +% zcmp(revcmp="ge") + +%def op_if_ne(): +% bincmp(revcmp="e") + +%def op_if_nez(): +% zcmp(revcmp="e") + +%def op_packed_switch(func="NterpDoPackedSwitch"): +/* + * Handle a packed-switch or sparse-switch instruction. In both cases + * we decode it and hand it off to a helper function. + * + * We don't really expect backward branches in a switch statement, but + * they're perfectly legal, so we check for them here. + * + * for: packed-switch, sparse-switch + */ + /* op vAA, +BBBB */ + movslq 2(rPC), OUT_ARG0 # rcx <- ssssssssBBBBbbbb + leaq (rPC,OUT_ARG0,2), OUT_ARG0 # rcx <- PC + ssssssssBBBBbbbb*2 + GET_VREG OUT_32_ARG1, rINSTq # eax <- vAA + call SYMBOL($func) + movslq %eax, rINSTq + BRANCH + +/* + * Return a 32-bit value. + */ +%def op_return(is_object="0"): + GET_VREG %eax, rINSTq # eax <- vAA + .if !$is_object + // In case we're going back to compiled code, put the + // result also in a xmm register. + movd %eax, %xmm0 + .endif + CFI_REMEMBER_STATE + movq -8(rREFS), %rsp + CFI_DEF_CFA(rsp, CALLEE_SAVES_SIZE) + RESTORE_ALL_CALLEE_SAVES + ret + CFI_RESTORE_STATE + +%def op_return_object(): +% op_return(is_object="1") + +%def op_return_void(): + // Thread fence for constructor is a no-op on x86_64. + CFI_REMEMBER_STATE + movq -8(rREFS), %rsp + CFI_DEF_CFA(rsp, CALLEE_SAVES_SIZE) + RESTORE_ALL_CALLEE_SAVES + ret + CFI_RESTORE_STATE + +%def op_return_void_no_barrier(): +% op_return_void() + +%def op_return_wide(): + GET_WIDE_VREG %rax, rINSTq # eax <- vAA + // In case we're going back to compiled code, put the + // result also in a xmm register. + movq %rax, %xmm0 + CFI_REMEMBER_STATE + movq -8(rREFS), %rsp + CFI_DEF_CFA(rsp, CALLEE_SAVES_SIZE) + RESTORE_ALL_CALLEE_SAVES + ret + CFI_RESTORE_STATE + +%def op_sparse_switch(): +% op_packed_switch(func="NterpDoSparseSwitch") + +%def op_throw(): + EXPORT_PC + GET_VREG %edi, rINSTq # edi<- vAA (exception object) + movq rSELF:THREAD_SELF_OFFSET, %rsi + call SYMBOL(art_quick_deliver_exception) + int3 diff --git a/runtime/interpreter/mterp/x86_64ng/invoke.S b/runtime/interpreter/mterp/x86_64ng/invoke.S new file mode 100644 index 0000000000..64d062359d --- /dev/null +++ b/runtime/interpreter/mterp/x86_64ng/invoke.S @@ -0,0 +1,173 @@ +%def invoke(helper="NterpUnimplemented"): + call SYMBOL($helper) + +%def op_invoke_custom(): + EXPORT_PC + movzwl 2(rPC), %edi // call_site index, first argument of runtime call. + jmp NterpCommonInvokeCustom + +%def op_invoke_custom_range(): + EXPORT_PC + movzwl 2(rPC), %edi // call_site index, first argument of runtime call. + jmp NterpCommonInvokeCustomRange + +%def invoke_direct_or_super(helper="", range=""): + EXPORT_PC + // Fast-path which gets the method from thread-local cache. + FETCH_FROM_THREAD_CACHE %rdi, 2f +1: + // Load the first argument (the 'this' pointer). + movzwl 4(rPC), %r11d // arguments + .if !$range + andq $$0xf, %r11 + .endif + movl (rFP, %r11, 4), %esi + // NullPointerException check. + movl (%esi), %eax + jmp $helper +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_method + movq %rax, %rdi + testl MACRO_LITERAL(1), %eax + je 1b + andq $$-2, %rdi // Remove the extra bit that marks it's a String.<init> method. + .if $range + jmp NterpHandleStringInitRange + .else + jmp NterpHandleStringInit + .endif + +%def op_invoke_direct(): +% invoke_direct_or_super(helper="NterpCommonInvokeInstance", range="0") + +%def op_invoke_direct_range(): +% invoke_direct_or_super(helper="NterpCommonInvokeInstanceRange", range="1") + +%def op_invoke_polymorphic(): + EXPORT_PC + // No need to fetch the target method. + // Load the first argument (the 'this' pointer). + movzwl 4(rPC), %r11d // arguments + andq $$0xf, %r11 + movl (rFP, %r11, 4), %esi + // NullPointerException check. + movl (%esi), %eax + jmp NterpCommonInvokePolymorphic + +%def op_invoke_polymorphic_range(): + EXPORT_PC + // No need to fetch the target method. + // Load the first argument (the 'this' pointer). + movzwl 4(rPC), %r11d // arguments + movl (rFP, %r11, 4), %esi + // NullPointerException check. + movl (%esi), %eax + jmp NterpCommonInvokePolymorphicRange + +%def invoke_interface(helper="", range=""): + EXPORT_PC + // Fast-path which gets the method from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + // First argument is the 'this' pointer. + movzwl 4(rPC), %r11d // arguments + .if !$range + andq $$0xf, %r11 + .endif + movl (rFP, %r11, 4), %esi + movl MIRROR_OBJECT_CLASS_OFFSET(%esi), %edx + movq MIRROR_CLASS_IMT_PTR_OFFSET_64(%edx), %rdx + movq (%rdx, %rax, 8), %rdi + jmp $helper +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_method + jmp 1b + +%def op_invoke_interface(): +% invoke_interface(helper="NterpCommonInvokeInterface", range="0") + +%def op_invoke_interface_range(): +% invoke_interface(helper="NterpCommonInvokeInterfaceRange", range="1") + +%def invoke_static(helper=""): + EXPORT_PC + // Fast-path which gets the method from thread-local cache. + FETCH_FROM_THREAD_CACHE %rdi, 1f + jmp $helper +1: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_method + movq %rax, %rdi + jmp $helper + +%def op_invoke_static(): +% invoke_static(helper="NterpCommonInvokeStatic") + +%def op_invoke_static_range(): +% invoke_static(helper="NterpCommonInvokeStaticRange") + +%def op_invoke_super(): +% invoke_direct_or_super(helper="NterpCommonInvokeInstance", range="0") + +%def op_invoke_super_range(): +% invoke_direct_or_super(helper="NterpCommonInvokeInstanceRange", range="1") + +%def invoke_virtual(helper="", range=""): + EXPORT_PC + // Fast-path which gets the method from thread-local cache. + FETCH_FROM_THREAD_CACHE %rdi, 2f +1: + // First argument is the 'this' pointer. + movzwl 4(rPC), %r11d // arguments + .if !$range + andq $$0xf, %r11 + .endif + movl (rFP, %r11, 4), %esi + // Note: if esi is null, this will be handled by our SIGSEGV handler. + movl MIRROR_OBJECT_CLASS_OFFSET(%esi), %edx + movq MIRROR_CLASS_VTABLE_OFFSET_64(%edx, %edi, 8), %rdi + jmp $helper +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_method + movl %eax, %edi + jmp 1b + +%def op_invoke_virtual(): +% invoke_virtual(helper="NterpCommonInvokeInstance", range="0") + +%def op_invoke_virtual_quick(): + EXPORT_PC + movzwl 2(rPC), %eax // offset + // First argument is the 'this' pointer. + movzwl 4(rPC), %r11d // arguments + andq $$0xf, %r11 + movl (rFP, %r11, 4), %esi + // Note: if esi is null, this will be handled by our SIGSEGV handler. + movl MIRROR_OBJECT_CLASS_OFFSET(%esi), %edx + movq MIRROR_CLASS_VTABLE_OFFSET_64(%edx, %eax, 8), %rdi + jmp NterpCommonInvokeInstance + +%def op_invoke_virtual_range(): +% invoke_virtual(helper="NterpCommonInvokeInstanceRange", range="1") + +%def op_invoke_virtual_range_quick(): + EXPORT_PC + movzwl 2(rPC), %eax // offset + // First argument is the 'this' pointer. + movzwl 4(rPC), %r11d // arguments + movl (rFP, %r11, 4), %esi + // Note: if esi is null, this will be handled by our SIGSEGV handler. + movl MIRROR_OBJECT_CLASS_OFFSET(%esi), %edx + movq MIRROR_CLASS_VTABLE_OFFSET_64(%edx, %eax, 8), %rdi + jmp NterpCommonInvokeInstanceRange diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S new file mode 100644 index 0000000000..4d97bcde7e --- /dev/null +++ b/runtime/interpreter/mterp/x86_64ng/main.S @@ -0,0 +1,2030 @@ +%def header(): +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This is a #include, not a %include, because we want the C pre-processor + * to expand the macros into assembler assignment statements. + */ +#include "asm_support.h" +#include "arch/x86_64/asm_support_x86_64.S" +#include "interpreter/cfi_asm_support.h" + +/** + * x86_64 ABI general notes: + * + * Caller save set: + * rax, rdx, rcx, rsi, rdi, r8-r11, st(0)-st(7) + * Callee save set: + * rbx, rbp, r12-r15 + * Return regs: + * 32-bit in eax + * 64-bit in rax + * fp on xmm0 + * + * First 8 fp parameters came in xmm0-xmm7. + * First 6 non-fp parameters came in rdi, rsi, rdx, rcx, r8, r9. + * Other parameters passed on stack, pushed right-to-left. On entry to target, first + * param is at 8(%esp). + * + * Stack must be 16-byte aligned to support SSE in native code. + */ + +#define IN_ARG3 %rcx +#define IN_ARG2 %rdx +#define IN_ARG1 %rsi +#define IN_ARG0 %rdi +/* Out Args */ +#define OUT_ARG3 %rcx +#define OUT_ARG2 %rdx +#define OUT_ARG1 %rsi +#define OUT_ARG0 %rdi +#define OUT_32_ARG3 %ecx +#define OUT_32_ARG2 %edx +#define OUT_32_ARG1 %esi +#define OUT_32_ARG0 %edi +#define OUT_FP_ARG1 %xmm1 +#define OUT_FP_ARG0 %xmm0 + +/* + * single-purpose registers, given names for clarity + */ +#define rSELF %gs +#define rPC %r12 +#define CFI_DEX 12 // DWARF register number of the register holding dex-pc (rPC). +#define CFI_TMP 5 // DWARF register number of the first argument register (rdi). +#define rFP %r13 +#define rINST %ebx +#define rINSTq %rbx +#define rINSTw %bx +#define rINSTbh %bh +#define rINSTbl %bl +#define rIBASE %r14 +#define rREFS %r15 +#define CFI_REFS 15 // DWARF register number of the reference array (r15). + +// Temporary registers while setting up a frame. +#define rNEW_FP %r8 +#define rNEW_REFS %r9 +#define CFI_NEW_REFS 9 + +/* + * Get/set the 32-bit value from a Dalvik register. + */ +#define VREG_ADDRESS(_vreg) (rFP,_vreg,4) +#define VREG_HIGH_ADDRESS(_vreg) 4(rFP,_vreg,4) +#define VREG_REF_ADDRESS(_vreg) (rREFS,_vreg,4) +#define VREG_REF_HIGH_ADDRESS(_vreg) 4(rREFS,_vreg,4) + +// Includes the return address implictly pushed on stack by 'call'. +#define CALLEE_SAVES_SIZE (6 * 8 + 4 * 8 + 1 * 8) + +// +8 for the ArtMethod of the caller. +#define OFFSET_TO_FIRST_ARGUMENT_IN_STACK (CALLEE_SAVES_SIZE + 8) + +/* + * Refresh rINST. + * At enter to handler rINST does not contain the opcode number. + * However some utilities require the full value, so this macro + * restores the opcode number. + */ +.macro REFRESH_INST _opnum + movb rINSTbl, rINSTbh + movb $$\_opnum, rINSTbl +.endm + +/* + * Fetch the next instruction from rPC into rINSTw. Does not advance rPC. + */ +.macro FETCH_INST + movzwq (rPC), rINSTq +.endm + +/* + * Remove opcode from rINST, compute the address of handler and jump to it. + */ +.macro GOTO_NEXT + movzx rINSTbl,%ecx + movzbl rINSTbh,rINST + shll MACRO_LITERAL(${handler_size_bits}), %ecx + addq rIBASE, %rcx + jmp *%rcx +.endm + +/* + * Advance rPC by instruction count. + */ +.macro ADVANCE_PC _count + leaq 2*\_count(rPC), rPC +.endm + +/* + * Advance rPC by instruction count, fetch instruction and jump to handler. + */ +.macro ADVANCE_PC_FETCH_AND_GOTO_NEXT _count + ADVANCE_PC \_count + FETCH_INST + GOTO_NEXT +.endm + +.macro GET_VREG _reg _vreg + movl VREG_ADDRESS(\_vreg), \_reg +.endm + +.macro GET_VREG_OBJECT _reg _vreg + movl VREG_REF_ADDRESS(\_vreg), \_reg +.endm + +/* Read wide value. */ +.macro GET_WIDE_VREG _reg _vreg + movq VREG_ADDRESS(\_vreg), \_reg +.endm + +.macro SET_VREG _reg _vreg + movl \_reg, VREG_ADDRESS(\_vreg) + movl MACRO_LITERAL(0), VREG_REF_ADDRESS(\_vreg) +.endm + +/* Write wide value. reg is clobbered. */ +.macro SET_WIDE_VREG _reg _vreg + movq \_reg, VREG_ADDRESS(\_vreg) + xorq \_reg, \_reg + movq \_reg, VREG_REF_ADDRESS(\_vreg) +.endm + +.macro SET_VREG_OBJECT _reg _vreg + movl \_reg, VREG_ADDRESS(\_vreg) + movl \_reg, VREG_REF_ADDRESS(\_vreg) +.endm + +.macro GET_VREG_HIGH _reg _vreg + movl VREG_HIGH_ADDRESS(\_vreg), \_reg +.endm + +.macro SET_VREG_HIGH _reg _vreg + movl \_reg, VREG_HIGH_ADDRESS(\_vreg) + movl MACRO_LITERAL(0), VREG_REF_HIGH_ADDRESS(\_vreg) +.endm + +.macro CLEAR_REF _vreg + movl MACRO_LITERAL(0), VREG_REF_ADDRESS(\_vreg) +.endm + +.macro CLEAR_WIDE_REF _vreg + movl MACRO_LITERAL(0), VREG_REF_ADDRESS(\_vreg) + movl MACRO_LITERAL(0), VREG_REF_HIGH_ADDRESS(\_vreg) +.endm + +.macro GET_VREG_XMMs _xmmreg _vreg + movss VREG_ADDRESS(\_vreg), \_xmmreg +.endm +.macro GET_VREG_XMMd _xmmreg _vreg + movsd VREG_ADDRESS(\_vreg), \_xmmreg +.endm +.macro SET_VREG_XMMs _xmmreg _vreg + movss \_xmmreg, VREG_ADDRESS(\_vreg) +.endm +.macro SET_VREG_XMMd _xmmreg _vreg + movsd \_xmmreg, VREG_ADDRESS(\_vreg) +.endm + +// An assembly entry that has a OatQuickMethodHeader prefix. +.macro OAT_ENTRY name, end + FUNCTION_TYPE(\name) + ASM_HIDDEN SYMBOL(\name) + .global SYMBOL(\name) + .balign 16 + .long 0 + .long (SYMBOL(\end) - SYMBOL(\name)) +SYMBOL(\name): +.endm + +.macro ENTRY name + .text + ASM_HIDDEN SYMBOL(\name) + .global SYMBOL(\name) + FUNCTION_TYPE(\name) +SYMBOL(\name): +.endm + +.macro END name + SIZE(\name) +.endm + +// Macro for defining entrypoints into runtime. We don't need to save registers +// (we're not holding references there), but there is no +// kDontSave runtime method. So just use the kSaveRefsOnly runtime method. +.macro NTERP_TRAMPOLINE name, helper +DEFINE_FUNCTION \name + SETUP_SAVE_REFS_ONLY_FRAME + call \helper + RESTORE_SAVE_REFS_ONLY_FRAME + RETURN_OR_DELIVER_PENDING_EXCEPTION +END_FUNCTION nterp_get_static_field +.endm + +.macro CLEAR_VOLATILE_MARKER reg + andq MACRO_LITERAL(-2), \reg +.endm + +.macro EXPORT_PC + movq rPC, -16(rREFS) +.endm + + +.macro BRANCH + // Update method counter and do a suspend check if the branch is negative. + testq rINSTq, rINSTq + js 3f +2: + leaq (rPC, rINSTq, 2), rPC + FETCH_INST + GOTO_NEXT +3: + movq (%rsp), %rdi + addw $$1, ART_METHOD_HOTNESS_COUNT_OFFSET(%rdi) + // If the counter overflows, handle this in the runtime. + jo NterpHandleHotnessOverflow + // Otherwise, do a suspend check. + testl $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), rSELF:THREAD_FLAGS_OFFSET + jz 2b + EXPORT_PC + call SYMBOL(art_quick_test_suspend) + jmp 2b +.endm + +// Puts the next floating point argument into the expected register, +// fetching values based on a non-range invoke. +// Uses rax as temporary. +// +// TODO: We could simplify a lot of code by loading the G argument into +// the "inst" register. Given that we enter the handler with "1(rPC)" in +// the rINST, we can just add rINST<<16 to the args and we don't even +// need to pass "arg_index" around. +.macro LOOP_OVER_SHORTY_LOADING_XMMS xmm_reg, inst, shorty, arg_index, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT + je 3f + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + // Handle extra argument in arg array taken by a long. + cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP + jne 1b + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + jmp 1b // goto LOOP +2: // FOUND_DOUBLE + subq MACRO_LITERAL(8), %rsp + movq REG_VAR(inst), %rax + andq MACRO_LITERAL(0xf), %rax + GET_VREG %eax, %rax + movl %eax, (%rsp) + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + cmpq MACRO_LITERAL(4), REG_VAR(arg_index) + je 5f + movq REG_VAR(inst), %rax + andq MACRO_LITERAL(0xf), %rax + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + jmp 6f +5: + movzbl 1(rPC), %eax + andq MACRO_LITERAL(0xf), %rax +6: + GET_VREG %eax, %rax + movl %eax, 4(%rsp) + movsd (%rsp), REG_VAR(xmm_reg) + addq MACRO_LITERAL(8), %rsp + jmp 4f +3: // FOUND_FLOAT + cmpq MACRO_LITERAL(4), REG_VAR(arg_index) + je 7f + movq REG_VAR(inst), %rax + andq MACRO_LITERAL(0xf), %rax + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + jmp 8f +7: + movzbl 1(rPC), %eax + andq MACRO_LITERAL(0xf), %rax +8: + GET_VREG_XMMs REG_VAR(xmm_reg), %rax +4: +.endm + +// Puts the next int/long/object argument in the expected register, +// fetching values based on a non-range invoke. +// Uses rax as temporary. +.macro LOOP_OVER_SHORTY_LOADING_GPRS gpr_reg64, gpr_reg32, inst, shorty, arg_index, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT + je 3f + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE + je 4f + cmpq MACRO_LITERAL(4), REG_VAR(arg_index) + je 7f + movq REG_VAR(inst), %rax + andq MACRO_LITERAL(0xf), %rax + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + jmp 8f +7: + movzbl 1(rPC), %eax + andq MACRO_LITERAL(0xf), %rax +8: + GET_VREG REG_VAR(gpr_reg32), %rax + jmp 5f +2: // FOUND_LONG + subq MACRO_LITERAL(8), %rsp + movq REG_VAR(inst), %rax + andq MACRO_LITERAL(0xf), %rax + GET_VREG %eax, %rax + movl %eax, (%rsp) + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + cmpq MACRO_LITERAL(4), REG_VAR(arg_index) + je 9f + movq REG_VAR(inst), %rax + andq MACRO_LITERAL(0xf), %rax + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + jmp 10f +9: + movzbl 1(rPC), %eax + andq MACRO_LITERAL(0xf), %rax +10: + GET_VREG %eax, %rax + movl %eax, 4(%rsp) + movq (%rsp), REG_VAR(gpr_reg64) + addq MACRO_LITERAL(8), %rsp + jmp 5f +3: // SKIP_FLOAT + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + jmp 1b +4: // SKIP_DOUBLE + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + cmpq MACRO_LITERAL(4), REG_VAR(arg_index) + je 1b + shrq MACRO_LITERAL(4), REG_VAR(inst) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + jmp 1b +5: +.endm + +// Puts the next floating point argument into the expected register, +// fetching values based on a range invoke. +// Uses rax as temporary. +.macro LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm_reg, shorty, arg_index, stack_index, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT + je 3f + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + // Handle extra argument in arg array taken by a long. + cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP + jne 1b + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + jmp 1b // goto LOOP +2: // FOUND_DOUBLE + GET_VREG_XMMd REG_VAR(xmm_reg), REG_VAR(arg_index) + addq MACRO_LITERAL(2), REG_VAR(arg_index) + addq MACRO_LITERAL(2), REG_VAR(stack_index) + jmp 4f +3: // FOUND_FLOAT + GET_VREG_XMMs REG_VAR(xmm_reg), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) +4: +.endm + +// Puts the next floating point argument into the expected stack slot, +// fetching values based on a range invoke. +// Uses rax as temporary. +// +// TODO: We could just copy all the vregs to the stack slots in a simple loop +// (or REP MOVSD) without looking at the shorty at all. (We could also drop +// the "stack_index" from the macros for loading registers.) We could also do +// that conditionally if argument word count > 6; otherwise we know that all +// args fit into registers. +.macro LOOP_RANGE_OVER_FPs shorty, arg_index, stack_index, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT + je 3f + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + // Handle extra argument in arg array taken by a long. + cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP + jne 1b + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + jmp 1b // goto LOOP +2: // FOUND_DOUBLE + movq (rFP, REG_VAR(arg_index), 4), %rax + movq %rax, 8(%rsp, REG_VAR(stack_index), 4) + addq MACRO_LITERAL(2), REG_VAR(arg_index) + addq MACRO_LITERAL(2), REG_VAR(stack_index) + jmp 1b +3: // FOUND_FLOAT + movl (rFP, REG_VAR(arg_index), 4), %eax + movl %eax, 8(%rsp, REG_VAR(stack_index), 4) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + jmp 1b +.endm + +// Puts the next int/long/object argument in the expected register, +// fetching values based on a range invoke. +// Uses rax as temporary. +.macro LOOP_RANGE_OVER_SHORTY_LOADING_GPRS gpr_reg64, gpr_reg32, shorty, arg_index, stack_index, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT + je 3f + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE + je 4f + movl (rFP, REG_VAR(arg_index), 4), REG_VAR(gpr_reg32) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + jmp 5f +2: // FOUND_LONG + movq (rFP, REG_VAR(arg_index), 4), REG_VAR(gpr_reg64) + addq MACRO_LITERAL(2), REG_VAR(arg_index) + addq MACRO_LITERAL(2), REG_VAR(stack_index) + jmp 5f +3: // SKIP_FLOAT + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + jmp 1b +4: // SKIP_DOUBLE + addq MACRO_LITERAL(2), REG_VAR(arg_index) + addq MACRO_LITERAL(2), REG_VAR(stack_index) + jmp 1b +5: +.endm + +// Puts the next int/long/object argument in the expected stack slot, +// fetching values based on a range invoke. +// Uses rax as temporary. +.macro LOOP_RANGE_OVER_INTs shorty, arg_index, stack_index, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // al := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT + je 3f + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE + je 4f + movl (rFP, REG_VAR(arg_index), 4), %eax + movl %eax, 8(%rsp, REG_VAR(stack_index), 4) + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + jmp 1b +2: // FOUND_LONG + movq (rFP, REG_VAR(arg_index), 4), %rax + movq %rax, 8(%rsp, REG_VAR(stack_index), 4) + addq MACRO_LITERAL(2), REG_VAR(arg_index) + addq MACRO_LITERAL(2), REG_VAR(stack_index) + jmp 1b +3: // SKIP_FLOAT + addq MACRO_LITERAL(1), REG_VAR(arg_index) + addq MACRO_LITERAL(1), REG_VAR(stack_index) + jmp 1b +4: // SKIP_DOUBLE + addq MACRO_LITERAL(2), REG_VAR(arg_index) + addq MACRO_LITERAL(2), REG_VAR(stack_index) + jmp 1b +.endm + +// Puts the next floating point parameter passed in physical register +// in the expected dex register array entry. +// Uses rax as temporary. +.macro LOOP_OVER_SHORTY_STORING_XMMS xmm_reg, shorty, arg_index, fp, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // al := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT + je 3f + addq MACRO_LITERAL(4), REG_VAR(arg_index) + // Handle extra argument in arg array taken by a long. + cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP + jne 1b + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 1b // goto LOOP +2: // FOUND_DOUBLE + movsd REG_VAR(xmm_reg),(REG_VAR(fp), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(8), REG_VAR(arg_index) + jmp 4f +3: // FOUND_FLOAT + movss REG_VAR(xmm_reg), (REG_VAR(fp), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(4), REG_VAR(arg_index) +4: +.endm + +// Puts the next int/long/object parameter passed in physical register +// in the expected dex register array entry, and in case of object in the +// expected reference array entry. +// Uses rax as temporary. +.macro LOOP_OVER_SHORTY_STORING_GPRS gpr_reg64, gpr_reg32, shorty, arg_index, regs, refs, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT + je 3f + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE + je 4f + movl REG_VAR(gpr_reg32), (REG_VAR(regs), REG_VAR(arg_index), 1) + cmpb MACRO_LITERAL(76), %al // if (al != 'L') goto NOT_REFERENCE + jne 6f + movl REG_VAR(gpr_reg32), (REG_VAR(refs), REG_VAR(arg_index), 1) +6: // NOT_REFERENCE + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 5f +2: // FOUND_LONG + movq REG_VAR(gpr_reg64), (REG_VAR(regs), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(8), REG_VAR(arg_index) + jmp 5f +3: // SKIP_FLOAT + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 1b +4: // SKIP_DOUBLE + addq MACRO_LITERAL(8), REG_VAR(arg_index) + jmp 1b +5: +.endm + +// Puts the next floating point parameter passed in stack +// in the expected dex register array entry. +// Uses rax as temporary. +// +// TODO: Or we could just spill regs to the reserved slots in the caller's +// frame and copy all regs in a simple loop. This time, however, we would +// need to look at the shorty anyway to look for the references. +// (The trade-off is different for passing arguments and receiving them.) +.macro LOOP_OVER_FPs shorty, arg_index, regs, stack_ptr, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE + je 2f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT + je 3f + addq MACRO_LITERAL(4), REG_VAR(arg_index) + // Handle extra argument in arg array taken by a long. + cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP + jne 1b + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 1b // goto LOOP +2: // FOUND_DOUBLE + movq OFFSET_TO_FIRST_ARGUMENT_IN_STACK(REG_VAR(stack_ptr), REG_VAR(arg_index), 1), %rax + movq %rax, (REG_VAR(regs), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(8), REG_VAR(arg_index) + jmp 1b +3: // FOUND_FLOAT + movl OFFSET_TO_FIRST_ARGUMENT_IN_STACK(REG_VAR(stack_ptr), REG_VAR(arg_index), 1), %eax + movl %eax, (REG_VAR(regs), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 1b +.endm + +// Puts the next int/long/object parameter passed in stack +// in the expected dex register array entry, and in case of object in the +// expected reference array entry. +// Uses rax as temporary. +.macro LOOP_OVER_INTs shorty, arg_index, regs, refs, stack_ptr, finished +1: // LOOP + movb (REG_VAR(shorty)), %al // bl := *shorty + addq MACRO_LITERAL(1), REG_VAR(shorty) // shorty++ + cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto finished + je VAR(finished) + cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG + je 2f + cmpb MACRO_LITERAL(76), %al // if (al == 'L') goto FOUND_REFERENCE + je 6f + cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT + je 3f + cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE + je 4f + movl OFFSET_TO_FIRST_ARGUMENT_IN_STACK(REG_VAR(stack_ptr), REG_VAR(arg_index), 1), %eax + movl %eax, (REG_VAR(regs), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 1b +6: // FOUND_REFERENCE + movl OFFSET_TO_FIRST_ARGUMENT_IN_STACK(REG_VAR(stack_ptr), REG_VAR(arg_index), 1), %eax + movl %eax, (REG_VAR(regs), REG_VAR(arg_index), 1) + movl %eax, (REG_VAR(refs), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 1b +2: // FOUND_LONG + movq OFFSET_TO_FIRST_ARGUMENT_IN_STACK(REG_VAR(stack_ptr), REG_VAR(arg_index), 1), %rax + movq %rax, (REG_VAR(regs), REG_VAR(arg_index), 1) + addq MACRO_LITERAL(8), REG_VAR(arg_index) + jmp 1b +3: // SKIP_FLOAT + addq MACRO_LITERAL(4), REG_VAR(arg_index) + jmp 1b +4: // SKIP_DOUBLE + addq MACRO_LITERAL(8), REG_VAR(arg_index) + jmp 1b +.endm + +// Increase method hotness and do suspend check before starting executing the method. +.macro START_EXECUTING_INSTRUCTIONS + movq (%rsp), %rdi + addw $$1, ART_METHOD_HOTNESS_COUNT_OFFSET(%rdi) + jo 2f + testl $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), rSELF:THREAD_FLAGS_OFFSET + jz 1f + EXPORT_PC + call SYMBOL(art_quick_test_suspend) +1: + FETCH_INST + GOTO_NEXT +2: + movq $$0, %rsi + movq rFP, %rdx + call nterp_hot_method + jmp 1b +.endm + +.macro SPILL_ALL_CALLEE_SAVES + PUSH r15 + PUSH r14 + PUSH r13 + PUSH r12 + PUSH rbp + PUSH rbx + SETUP_FP_CALLEE_SAVE_FRAME +.endm + +.macro RESTORE_ALL_CALLEE_SAVES + RESTORE_FP_CALLEE_SAVE_FRAME + POP rbx + POP rbp + POP r12 + POP r13 + POP r14 + POP r15 +.endm + +// Helper to setup the stack after doing a nterp to nterp call. This will setup: +// - rNEW_FP: the new pointer to dex registers +// - rNEW_REFS: the new pointer to references +// - rPC: the new PC pointer to execute +// - edi: number of arguments +// - ecx: first dex register +.macro SETUP_STACK_FOR_INVOKE + // We do the same stack overflow check as the compiler. See CanMethodUseNterp + // in how we limit the maximum nterp frame size. + testq %rax, -STACK_OVERFLOW_RESERVED_BYTES(%rsp) + + // Spill all callee saves to have a consistent stack frame whether we + // are called by compiled code or nterp. + SPILL_ALL_CALLEE_SAVES + + movq %rsp, %r11 + CFI_DEF_CFA_REGISTER(r11) + + // From this point: + // - rax contains code item + // - rdi contains method + // - r11 contains saved stack pointer. + + // Create space for registers * 2. Set rFP and rRefs. + movzwl CODE_ITEM_REGISTERS_SIZE_OFFSET(%rax), %ecx + sall MACRO_LITERAL(2), %ecx + subq %rcx, %rsp + movq %rsp, rNEW_FP + subq %rcx, %rsp + movq %rsp, rNEW_REFS + + // Put nulls in reference frame. + testl %ecx, %ecx + je 2f + movq rNEW_REFS, %rcx +1: + movl MACRO_LITERAL(0), (%rcx) + addq MACRO_LITERAL(4), %rcx + cmpq %rcx, rNEW_FP + jne 1b +2: + // Create space for the previous frame, saved dex pc, and method being called + subq MACRO_LITERAL(24), %rsp + + // TODO: We could get rid of the two lines below if we preserve r11 until we copy + // rNEW_REFS to rREFS. (We currently do because we use it for copying parameters. + // We should move the alignment and rewrite the parameter copy so that we do not + // need r11 for that and still preserve r11.) + // + // Save the previous frame. + movq %r11, -8(rNEW_REFS) + CFI_DEFINE_CFA_DEREF(CFI_NEW_REFS, -8, (6 + 4 + 1) * 8) + + // Take space for outs. + movzwl CODE_ITEM_OUTS_SIZE_OFFSET(%rax), %ecx + sall MACRO_LITERAL(2), %ecx + subq %rcx, %rsp + + // Align stack pointer to 16. + andq MACRO_LITERAL(-16), %rsp + + // Save the ArtMethod. + movq %rdi, (%rsp) + + // Fetch instruction information before replacing rPC. + movzbl 1(rPC), %edi + movzwl 4(rPC), %ecx + + // Set the dex pc pointer. + leaq CODE_ITEM_INSNS_OFFSET(%rax), rPC + CFI_DEFINE_DEX_PC_WITH_OFFSET(CFI_TMP, CFI_DEX, 0) +.endm + +// Setup arguments based on a non-range nterp to nterp call, and start executing +// the method. We expect: +// - rNEW_FP: the new pointer to dex registers +// - rNEW_REFS: the new pointer to references +// - rPC: the new PC pointer to execute +// - edi: number of arguments +// - ecx: first dex register +// - r11: top of dex register array +// - esi: receiver if non-static. +.macro SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=0 + // Now all temporary registers (except r11 containing top of registers array) + // are available, copy the parameters. + // /* op vA, vB, {vC...vG} */ + movl %edi, %eax + shrl $$4, %eax # Number of arguments + jz 6f # shl sets the Z flag + movq MACRO_LITERAL(-1), %r10 + cmpl MACRO_LITERAL(2), %eax + jl 1f + je 2f + cmpl MACRO_LITERAL(4), %eax + jl 3f + je 4f + + // We use a decrementing r10 to store references relative + // to rNEW_FP and dex registers relative to r11. + // + // TODO: We could set up r10 as the number of registers (this can be an additional output from + // SETUP_STACK_FOR_INVOKE) and then just decrement it by one before copying each arg to + // (rNEW_FP, r10, 4) and (rNEW_REFS, r10, 4). + // Maybe even introduce macros NEW_VREG_ADDRESS/NEW_VREG_REF_ADDRESS. +5: + andq MACRO_LITERAL(15), %rdi + GET_VREG_OBJECT %edx, %rdi + movl %edx, (rNEW_FP, %r10, 4) + GET_VREG %edx, %rdi + movl %edx, (%r11, %r10, 4) + subq MACRO_LITERAL(1), %r10 +4: + movl %ecx, %eax + shrl MACRO_LITERAL(12), %eax + GET_VREG_OBJECT %edx, %rax + movl %edx, (rNEW_FP, %r10, 4) + GET_VREG %edx, %rax + movl %edx, (%r11, %r10, 4) + subq MACRO_LITERAL(1), %r10 +3: + movl %ecx, %eax + shrl MACRO_LITERAL(8), %eax + andl MACRO_LITERAL(0xf), %eax + GET_VREG_OBJECT %edx, %rax + movl %edx, (rNEW_FP, %r10, 4) + GET_VREG %edx, %rax + movl %edx, (%r11, %r10, 4) + subq MACRO_LITERAL(1), %r10 +2: + movl %ecx, %eax + shrl MACRO_LITERAL(4), %eax + andl MACRO_LITERAL(0xf), %eax + GET_VREG_OBJECT %edx, %rax + movl %edx, (rNEW_FP, %r10, 4) + GET_VREG %edx, %rax + movl %edx, (%r11, %r10, 4) + subq MACRO_LITERAL(1), %r10 +1: + .if \is_string_init + // Ignore the first argument + .elseif \is_static + movl %ecx, %eax + andq MACRO_LITERAL(0x000f), %rax + GET_VREG_OBJECT %edx, %rax + movl %edx, (rNEW_FP, %r10, 4) + GET_VREG %edx, %rax + movl %edx, (%r11, %r10, 4) + .else + movl %esi, (rNEW_FP, %r10, 4) + movl %esi, (%r11, %r10, 4) + .endif + +6: + // Start executing the method. + movq rNEW_FP, rFP + movq rNEW_REFS, rREFS + CFI_DEFINE_CFA_DEREF(CFI_REFS, -8, (6 + 4 + 1) * 8) + START_EXECUTING_INSTRUCTIONS +.endm + +// Setup arguments based on a range nterp to nterp call, and start executing +// the method. +.macro SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=0 + // edi is number of arguments + // ecx is first register + movq MACRO_LITERAL(-4), %r10 + .if \is_string_init + // Ignore the first argument + subl $$1, %edi + addl $$1, %ecx + .elseif !\is_static + subl $$1, %edi + addl $$1, %ecx + .endif + + testl %edi, %edi + je 2f + leaq (rREFS, %rcx, 4), %rax # pointer to first argument in reference array + leaq (%rax, %rdi, 4), %rax # pointer to last argument in reference array + leaq (rFP, %rcx, 4), %rcx # pointer to first argument in register array + leaq (%rcx, %rdi, 4), %rdi # pointer to last argument in register array + // TODO: Same comment for copying arguments as in SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE. +1: + movl -4(%rax), %edx + movl %edx, (rNEW_FP, %r10, 1) + movl -4(%rdi), %edx + movl %edx, (%r11, %r10, 1) + subq MACRO_LITERAL(4), %r10 + subq MACRO_LITERAL(4), %rax + subq MACRO_LITERAL(4), %rdi + cmpq %rcx, %rdi + jne 1b + +2: + .if \is_string_init + // Ignore first argument + .elseif !\is_static + movl %esi, (rNEW_FP, %r10, 1) + movl %esi, (%r11, %r10, 1) + .endif + movq rNEW_FP, rFP + movq rNEW_REFS, rREFS + CFI_DEFINE_CFA_DEREF(CFI_REFS, -8, (6 + 4 + 1) * 8) + START_EXECUTING_INSTRUCTIONS +.endm + +.macro GET_SHORTY dest, is_interface, is_polymorphic, is_custom + push %rdi + push %rsi + .if \is_polymorphic + movq 16(%rsp), %rdi + movq rPC, %rsi + call SYMBOL(NterpGetShortyFromInvokePolymorphic) + .elseif \is_custom + movq 16(%rsp), %rdi + movq rPC, %rsi + call SYMBOL(NterpGetShortyFromInvokeCustom) + .elseif \is_interface + movq 16(%rsp), %rdi + movzwl 2(rPC), %esi + call SYMBOL(NterpGetShortyFromMethodId) + .else + call SYMBOL(NterpGetShorty) + .endif + pop %rsi + pop %rdi + movq %rax, \dest +.endm + +.macro DO_ENTRY_POINT_CHECK call_compiled_code + // On entry, the method is %rdi, the instance is %rsi + leaq ExecuteNterpImpl(%rip), %rax + cmpq %rax, ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) + jne VAR(call_compiled_code) + + // TODO: Get code item in a better way and remove below + push %rdi + push %rsi + call SYMBOL(NterpGetCodeItem) + pop %rsi + pop %rdi + // TODO: Get code item in a better way and remove above +.endm + +// Uses r9 and r10 as temporary +.macro UPDATE_REGISTERS_FOR_STRING_INIT old_value, new_value + movq rREFS, %r9 + movq rFP, %r10 +1: + cmpl (%r9), \old_value + jne 2f + movl \new_value, (%r9) + movl \new_value, (%r10) +2: + addq $$4, %r9 + addq $$4, %r10 + cmpq %r9, rFP + jne 1b +.endm + +.macro COMMON_INVOKE_NON_RANGE is_static=0, is_interface=0, suffix="", is_string_init=0, is_polymorphic=0, is_custom=0 + .if \is_polymorphic + // We always go to compiled code for polymorphic calls. + .elseif \is_custom + // We always go to compiled code for custom calls. + .else + DO_ENTRY_POINT_CHECK .Lcall_compiled_code_\suffix + .if \is_string_init + call nterp_to_nterp_string_init_non_range + .elseif \is_static + call nterp_to_nterp_static_non_range + .else + call nterp_to_nterp_instance_non_range + .endif + jmp .Ldone_return_\suffix + .endif + +.Lcall_compiled_code_\suffix: + GET_SHORTY rINSTq, \is_interface, \is_polymorphic, \is_custom + // From this point: + // - rISNTq contains shorty (in callee-save to switch over return value after call). + // - rdi contains method + // - rsi contains 'this' pointer for instance method. + leaq 1(rINSTq), %r9 // shorty + 1 ; ie skip return arg character + movzwl 4(rPC), %r11d // arguments + .if \is_string_init + shrq MACRO_LITERAL(4), %r11 + movq $$1, %r10 // ignore first argument + .elseif \is_static + movq $$0, %r10 // arg_index + .else + shrq MACRO_LITERAL(4), %r11 + movq $$1, %r10 // arg_index + .endif + LOOP_OVER_SHORTY_LOADING_XMMS xmm0, r11, r9, r10, .Lxmm_setup_finished_\suffix + LOOP_OVER_SHORTY_LOADING_XMMS xmm1, r11, r9, r10, .Lxmm_setup_finished_\suffix + LOOP_OVER_SHORTY_LOADING_XMMS xmm2, r11, r9, r10, .Lxmm_setup_finished_\suffix + LOOP_OVER_SHORTY_LOADING_XMMS xmm3, r11, r9, r10, .Lxmm_setup_finished_\suffix + LOOP_OVER_SHORTY_LOADING_XMMS xmm4, r11, r9, r10, .Lxmm_setup_finished_\suffix +.Lxmm_setup_finished_\suffix: + leaq 1(rINSTq), %r9 // shorty + 1 ; ie skip return arg character + movzwl 4(rPC), %r11d // arguments + .if \is_string_init + movq $$1, %r10 // ignore first argument + shrq MACRO_LITERAL(4), %r11 + LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, r11, r9, r10, .Lgpr_setup_finished_\suffix + .elseif \is_static + movq $$0, %r10 // arg_index + LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, r11, r9, r10, .Lgpr_setup_finished_\suffix + .else + shrq MACRO_LITERAL(4), %r11 + movq $$1, %r10 // arg_index + .endif + LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, r11, r9, r10, .Lgpr_setup_finished_\suffix + LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, r11, r9, r10, .Lgpr_setup_finished_\suffix + LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, r11, r9, r10, .Lgpr_setup_finished_\suffix + LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, r11, r9, r10, .Lgpr_setup_finished_\suffix +.Lgpr_setup_finished_\suffix: + .if \is_polymorphic + call SYMBOL(art_quick_invoke_polymorphic) + .elseif \is_custom + call SYMBOL(art_quick_invoke_custom) + .else + .if \is_interface + movzwl 2(rPC), %eax + .endif + call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method. + .endif + cmpb LITERAL(68), (rINSTq) // Test if result type char == 'D'. + je .Lreturn_double_\suffix + cmpb LITERAL(70), (rINSTq) // Test if result type char == 'F'. + jne .Ldone_return_\suffix +.Lreturn_float_\suffix: + movd %xmm0, %eax + jmp .Ldone_return_\suffix +.Lreturn_double_\suffix: + movq %xmm0, %rax +.Ldone_return_\suffix: + /* resume execution of caller */ + .if \is_string_init + movzwl 4(rPC), %r11d // arguments + andq $$0xf, %r11 + GET_VREG %esi, %r11 + UPDATE_REGISTERS_FOR_STRING_INIT %esi, %eax + .endif + + .if \is_polymorphic + ADVANCE_PC_FETCH_AND_GOTO_NEXT 4 + .else + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + .endif +.endm + +.macro COMMON_INVOKE_RANGE is_static=0, is_interface=0, suffix="", is_string_init=0, is_polymorphic=0, is_custom=0 + .if \is_polymorphic + // We always go to compiled code for polymorphic calls. + .elseif \is_custom + // We always go to compiled code for custom calls. + .else + DO_ENTRY_POINT_CHECK .Lcall_compiled_code_range_\suffix + .if \is_string_init + call nterp_to_nterp_string_init_range + .elseif \is_static + call nterp_to_nterp_static_range + .else + call nterp_to_nterp_instance_range + .endif + jmp .Ldone_return_range_\suffix + .endif + +.Lcall_compiled_code_range_\suffix: + GET_SHORTY rINSTq, \is_interface, \is_polymorphic, \is_custom + // From this point: + // - rINSTq contains shorty (in callee-save to switch over return value after call). + // - rdi contains method + // - rsi contains 'this' pointer for instance method. + leaq 1(rINSTq), %r9 // shorty + 1 ; ie skip return arg character + movzwl 4(rPC), %r10d // arg start index + .if \is_string_init + addq $$1, %r10 // arg start index + movq $$1, %rbp // index in stack + .elseif \is_static + movq $$0, %rbp // index in stack + .else + addq $$1, %r10 // arg start index + movq $$1, %rbp // index in stack + .endif + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm0, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm1, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm2, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm3, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm4, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm5, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm6, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_XMMS xmm7, r9, r10, rbp, .Lxmm_setup_finished_range_\suffix + LOOP_RANGE_OVER_FPs r9, r10, rbp, .Lxmm_setup_finished_range_\suffix +.Lxmm_setup_finished_range_\suffix: + leaq 1(%rbx), %r11 // shorty + 1 ; ie skip return arg character + movzwl 4(rPC), %r10d // arg start index + .if \is_string_init + addq $$1, %r10 // arg start index + movq $$1, %rbp // index in stack + LOOP_RANGE_OVER_SHORTY_LOADING_GPRS rsi, esi, r11, r10, rbp, .Lgpr_setup_finished_\suffix + .elseif \is_static + movq $$0, %rbp // index in stack + LOOP_RANGE_OVER_SHORTY_LOADING_GPRS rsi, esi, r11, r10, rbp, .Lgpr_setup_finished_\suffix + .else + addq $$1, %r10 // arg start index + movq $$1, %rbp // index in stack + .endif + LOOP_RANGE_OVER_SHORTY_LOADING_GPRS rdx, edx, r11, r10, rbp, .Lgpr_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_GPRS rcx, ecx, r11, r10, rbp, .Lgpr_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_GPRS r8, r8d, r11, r10, rbp, .Lgpr_setup_finished_range_\suffix + LOOP_RANGE_OVER_SHORTY_LOADING_GPRS r9, r9d, r11, r10, rbp, .Lgpr_setup_finished_range_\suffix + LOOP_RANGE_OVER_INTs r11, r10, rbp, .Lgpr_setup_finished_range_\suffix + +.Lgpr_setup_finished_range_\suffix: + .if \is_polymorphic + call SYMBOL(art_quick_invoke_polymorphic) + .elseif \is_custom + call SYMBOL(art_quick_invoke_custom) + .else + .if \is_interface + movzwl 2(rPC), %eax + .endif + call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method. + .endif + cmpb LITERAL(68), (%rbx) // Test if result type char == 'D'. + je .Lreturn_range_double_\suffix + cmpb LITERAL(70), (%rbx) // Test if result type char == 'F'. + je .Lreturn_range_float_\suffix + /* resume execution of caller */ +.Ldone_return_range_\suffix: + .if \is_string_init + movzwl 4(rPC), %r11d // arguments + GET_VREG %esi, %r11 + UPDATE_REGISTERS_FOR_STRING_INIT %esi, %eax + .endif + + .if \is_polymorphic + ADVANCE_PC_FETCH_AND_GOTO_NEXT 4 + .else + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + .endif +.Lreturn_range_double_\suffix: + movq %xmm0, %rax + jmp .Ldone_return_range_\suffix +.Lreturn_range_float_\suffix: + movd %xmm0, %eax + jmp .Ldone_return_range_\suffix +.endm + +// Fetch some information from the thread cache. +// Uses rax, rdx, rcx as temporaries. +.macro FETCH_FROM_THREAD_CACHE dest_reg, slow_path + movq rSELF:THREAD_SELF_OFFSET, %rax + movq rPC, %rdx + salq MACRO_LITERAL(THREAD_INTERPRETER_CACHE_SIZE_SHIFT), %rdx + andq MACRO_LITERAL(THREAD_INTERPRETER_CACHE_SIZE_MASK), %rdx + cmpq THREAD_INTERPRETER_CACHE_OFFSET(%rax, %rdx, 1), rPC + jne \slow_path + movq __SIZEOF_POINTER__+THREAD_INTERPRETER_CACHE_OFFSET(%rax, %rdx, 1), \dest_reg +.endm + +// Helper for static field get. +.macro OP_SGET load="movl", wide="0" + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + movl ART_FIELD_OFFSET_OFFSET(%rax), %edx + movl ART_FIELD_DECLARING_CLASS_OFFSET(%rax), %eax + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 3f +4: + .if \wide + movq (%eax,%edx,1), %rax + SET_WIDE_VREG %rax, rINSTq # fp[A] <- value + .else + \load (%eax, %edx, 1), %eax + SET_VREG %eax, rINSTq # fp[A] <- value + .endif + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_static_field + // Clear the marker that we put for volatile fields. The x86 memory + // model doesn't require a barrier. + andq $$-2, %rax + jmp 1b +3: + call art_quick_read_barrier_mark_reg00 + jmp 4b +.endm + +// Helper for static field put. +.macro OP_SPUT rINST_reg="rINST", store="movl", wide="0": + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + movl ART_FIELD_OFFSET_OFFSET(%rax), %edx + movl ART_FIELD_DECLARING_CLASS_OFFSET(%rax), %eax + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 3f +4: + .if \wide + GET_WIDE_VREG rINSTq, rINSTq # rINST <- v[A] + .else + GET_VREG rINST, rINSTq # rINST <- v[A] + .endif + \store \rINST_reg, (%rax,%rdx,1) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_static_field + testq MACRO_LITERAL(1), %rax + je 1b + // Clear the marker that we put for volatile fields. The x86 memory + // model doesn't require a barrier. + CLEAR_VOLATILE_MARKER %rax + movl ART_FIELD_OFFSET_OFFSET(%rax), %edx + movl ART_FIELD_DECLARING_CLASS_OFFSET(%rax), %eax + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 6f +5: + .if \wide + GET_WIDE_VREG rINSTq, rINSTq # rINST <- v[A] + .else + GET_VREG rINST, rINSTq # rINST <- v[A] + .endif + \store \rINST_reg, (%rax,%rdx,1) + lock addl $$0, (%rsp) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +3: + call art_quick_read_barrier_mark_reg00 + jmp 4b +6: + call art_quick_read_barrier_mark_reg00 + jmp 5b +.endm + + +.macro OP_IPUT_INTERNAL rINST_reg="rINST", store="movl", wide="0": + movzbq rINSTbl, %rcx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + andb $$0xf, rINSTbl # rINST <- A + .if \wide + GET_WIDE_VREG rINSTq, rINSTq # rax<- fp[A]/fp[A+1] + .else + GET_VREG rINST, rINSTq # rINST <- v[A] + .endif + \store \rINST_reg, (%rcx,%rax,1) +.endm + +// Helper for instance field put. +.macro OP_IPUT rINST_reg="rINST", store="movl", wide="0": + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + OP_IPUT_INTERNAL \rINST_reg, \store, \wide + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_instance_field_offset + testl %eax, %eax + jns 1b + negl %eax + OP_IPUT_INTERNAL \rINST_reg, \store, \wide + lock addl $$0, (%rsp) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +.endm + +// Helper for instance field get. +.macro OP_IGET load="movl", wide="0" + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + movl rINST, %ecx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + andb $$0xf,rINSTbl # rINST <- A + .if \wide + movq (%rcx,%rax,1), %rax + SET_WIDE_VREG %rax, rINSTq # fp[A] <- value + .else + \load (%rcx,%rax,1), %eax + SET_VREG %eax, rINSTq # fp[A] <- value + .endif + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_instance_field_offset + testl %eax, %eax + jns 1b + negl %eax + jmp 1b +.endm + +%def entry(): +/* + * ArtMethod entry point. + * + * On entry: + * rdi ArtMethod* callee + * rest method parameters + */ + +OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl + .cfi_startproc + .cfi_def_cfa rsp, 8 + testq %rax, -STACK_OVERFLOW_RESERVED_BYTES(%rsp) + /* Spill callee save regs */ + SPILL_ALL_CALLEE_SAVES + + // TODO: Get shorty in a better way and remove below + PUSH rdi + PUSH rsi + PUSH rdx + PUSH rcx + PUSH r8 + PUSH r9 + + // Save xmm registers + alignment. + subq MACRO_LITERAL(8 * 8 + 8), %rsp + CFI_ADJUST_CFA_OFFSET(8 * 8 + 8) + movq %xmm0, 0(%rsp) + movq %xmm1, 8(%rsp) + movq %xmm2, 16(%rsp) + movq %xmm3, 24(%rsp) + movq %xmm4, 32(%rsp) + movq %xmm5, 40(%rsp) + movq %xmm6, 48(%rsp) + movq %xmm7, 56(%rsp) + + // Save method in callee-save rbx. + movq %rdi, %rbx + call SYMBOL(NterpGetShorty) + // Save shorty in callee-save rbp. + movq %rax, %rbp + movq %rbx, %rdi + call SYMBOL(NterpGetCodeItem) + movq %rax, rPC + + // Restore xmm registers _ alignment. + movq 0(%rsp), %xmm0 + movq 8(%rsp), %xmm1 + movq 16(%rsp), %xmm2 + movq 24(%rsp), %xmm3 + movq 32(%rsp), %xmm4 + movq 40(%rsp), %xmm5 + movq 48(%rsp), %xmm6 + movq 56(%rsp), %xmm7 + addq MACRO_LITERAL(8 * 8 + 8), %rsp + CFI_ADJUST_CFA_OFFSET(-8 * 8 - 8) + + POP r9 + POP r8 + POP rcx + POP rdx + POP rsi + POP rdi + // TODO: Get shorty in a better way and remove above + + movq %rsp, %r14 // Save stack pointer + CFI_DEF_CFA_REGISTER(r14) + + // Create space for registers * 2. Set rFP and rRefs. + movzwl CODE_ITEM_REGISTERS_SIZE_OFFSET(rPC), %ebx + sall $$2, %ebx + subq %rbx, %rsp + movq %rsp, rFP + subq %rbx, %rsp + movq %rsp, rREFS + // Put nulls in reference frame. + testl %ebx, %ebx + je .Ldone_clearing_references + movq rREFS, %r11 +.Lclear_references: + movl $$0, (%r11) + addq $$4, %r11 + cmpq %r11, rFP + jne .Lclear_references +.Ldone_clearing_references: + + // Create space for the previous frame, saved pc, and method being called + subq $$24, %rsp + + // Save the previous frame. + movq %r14, -8(rREFS) + CFI_DEFINE_CFA_DEREF(CFI_REFS, -8, (6 + 4 + 1) * 8) + + // Take space for outs. + movzwl CODE_ITEM_OUTS_SIZE_OFFSET(rPC), %r11d + sall $$2, %r11d + subq %r11, %rsp + + // Align stack pointer to 16. + andq $$-16, %rsp + + // Save the ArtMethod. + movq %rdi, (%rsp) + + // Setup the parameters + movzwl CODE_ITEM_INS_SIZE_OFFSET(rPC), %r11d + testl %r11d, %r11d + je .Lgpr_setup_finished + + sall $$2, %r11d + subq %r11, %rbx // rbx is now the offset for inputs into the registers array. + + // Available r11, rbx, rdi, r10 + testl $$ART_METHOD_IS_STATIC_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi) + // Note the leaq below don't change the flags. + leaq 1(%rbp), %r10 // shorty + 1 ; ie skip return arg character + leaq (rFP, %rbx, 1), %rdi + leaq (rREFS, %rbx, 1), %rbx + jne .Lhandle_static_method + movl %esi, (%rdi) + movl %esi, (%rbx) + addq $$4, %rdi + addq $$4, %rbx + addq $$4, %r14 + movq $$0, %r11 + jmp .Lcontinue_setup_gprs +.Lhandle_static_method: + movq $$0, %r11 + LOOP_OVER_SHORTY_STORING_GPRS rsi, esi, r10, r11, rdi, rbx, .Lgpr_setup_finished +.Lcontinue_setup_gprs: + LOOP_OVER_SHORTY_STORING_GPRS rdx, edx, r10, r11, rdi, rbx, .Lgpr_setup_finished + LOOP_OVER_SHORTY_STORING_GPRS rcx, ecx, r10, r11, rdi, rbx, .Lgpr_setup_finished + LOOP_OVER_SHORTY_STORING_GPRS r8, r8d, r10, r11, rdi, rbx, .Lgpr_setup_finished + LOOP_OVER_SHORTY_STORING_GPRS r9, r9d, r10, r11, rdi, rbx, .Lgpr_setup_finished + LOOP_OVER_INTs r10, r11, rdi, rbx, r14, .Lgpr_setup_finished +.Lgpr_setup_finished: + leaq 1(%rbp), %r10 // shorty + 1 ; ie skip return arg character + movq $$0, %r11 // reset counter + LOOP_OVER_SHORTY_STORING_XMMS xmm0, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_SHORTY_STORING_XMMS xmm1, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_SHORTY_STORING_XMMS xmm2, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_SHORTY_STORING_XMMS xmm3, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_SHORTY_STORING_XMMS xmm4, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_SHORTY_STORING_XMMS xmm5, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_SHORTY_STORING_XMMS xmm6, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_SHORTY_STORING_XMMS xmm7, r10, r11, rdi, .Lxmm_setup_finished + LOOP_OVER_FPs r10, r11, rdi, r14, .Lxmm_setup_finished +.Lxmm_setup_finished: + // Set the dex pc pointer. + addq $$CODE_ITEM_INSNS_OFFSET, rPC + CFI_DEFINE_DEX_PC_WITH_OFFSET(CFI_TMP, CFI_DEX, 0) + + // Set rIBASE + leaq artNterpAsmInstructionStart(%rip), rIBASE + /* start executing the instruction at rPC */ + FETCH_INST + GOTO_NEXT + /* NOTE: no fallthrough */ + // cfi info continues, and covers the whole nterp implementation. + END ExecuteNterpImpl + +%def opcode_pre(): + +%def helpers(): + +%def footer(): +/* + * =========================================================================== + * Common subroutines and data + * =========================================================================== + */ + + .text + .align 2 + +// Note: mterp also uses the common_* names below for helpers, but that's OK +// as the C compiler compiled each interpreter separately. +common_errDivideByZero: + EXPORT_PC + call art_quick_throw_div_zero + +common_errArrayIndex: + EXPORT_PC + movl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %eax + movl %esi, %edi + movl %eax, %esi + call art_quick_throw_array_bounds + +common_errNullObject: + EXPORT_PC + call art_quick_throw_null_pointer_exception + +NterpCommonInvokeStatic: + COMMON_INVOKE_NON_RANGE is_static=1, is_interface=0, suffix="invokeStatic" + +NterpCommonInvokeStaticRange: + COMMON_INVOKE_RANGE is_static=1, is_interface=0, suffix="invokeStatic" + +NterpCommonInvokeInstance: + COMMON_INVOKE_NON_RANGE is_static=0, is_interface=0, suffix="invokeInstance" + +NterpCommonInvokeInstanceRange: + COMMON_INVOKE_RANGE is_static=0, is_interface=0, suffix="invokeInstance" + +NterpCommonInvokeInterface: + COMMON_INVOKE_NON_RANGE is_static=0, is_interface=1, suffix="invokeInterface" + +NterpCommonInvokeInterfaceRange: + COMMON_INVOKE_RANGE is_static=0, is_interface=1, suffix="invokeInterface" + +NterpCommonInvokePolymorphic: + COMMON_INVOKE_NON_RANGE is_static=0, is_interface=0, is_string_init=0, is_polymorphic=1, suffix="invokePolymorphic" + +NterpCommonInvokePolymorphicRange: + COMMON_INVOKE_RANGE is_static=0, is_interface=0, is_polymorphic=1, suffix="invokePolymorphic" + +NterpCommonInvokeCustom: + COMMON_INVOKE_NON_RANGE is_static=1, is_interface=0, is_string_init=0, is_polymorphic=0, is_custom=1, suffix="invokeCustom" + +NterpCommonInvokeCustomRange: + COMMON_INVOKE_RANGE is_static=1, is_interface=0, is_polymorphic=0, is_custom=1, suffix="invokeCustom" + +NterpHandleStringInit: + COMMON_INVOKE_NON_RANGE is_static=0, is_interface=0, is_string_init=1, suffix="stringInit" + +NterpHandleStringInitRange: + COMMON_INVOKE_RANGE is_static=0, is_interface=0, is_string_init=1, suffix="stringInit" + +NterpNewInstance: + EXPORT_PC + // Fast-path which gets the class from thread-local cache. + FETCH_FROM_THREAD_CACHE %rdi, 2f + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 3f +4: + callq *rSELF:THREAD_ALLOC_OBJECT_ENTRYPOINT_OFFSET +1: + SET_VREG_OBJECT %eax, rINSTq # fp[A] <- value + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_class_or_allocate_object + jmp 1b +3: + // 07 is %rdi + call art_quick_read_barrier_mark_reg07 + jmp 4b + +NterpNewArray: + /* new-array vA, vB, class@CCCC */ + EXPORT_PC + // Fast-path which gets the class from thread-local cache. + FETCH_FROM_THREAD_CACHE %rdi, 2f + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 3f +1: + movzbl rINSTbl,%esi + sarl $$4,%esi # esi<- B + GET_VREG %esi %rsi # esi<- vB (array length) + andb $$0xf,rINSTbl # rINST<- A + callq *rSELF:THREAD_ALLOC_ARRAY_ENTRYPOINT_OFFSET + SET_VREG_OBJECT %eax, rINSTq # fp[A] <- value + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_class_or_allocate_object + movq %rax, %rdi + jmp 1b +3: + // 07 is %rdi + call art_quick_read_barrier_mark_reg07 + jmp 1b + +NterpPutObjectInstanceField: + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + movzbq rINSTbl, %rcx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + andb $$0xf, rINSTbl # rINST <- A + GET_VREG rINST, rINSTq # rINST <- v[A] + movl rINST, (%rcx,%rax,1) + testl rINST, rINST + je 4f + movq rSELF:THREAD_CARD_TABLE_OFFSET, %rax + shrq $$CARD_TABLE_CARD_SHIFT, %rcx + movb %al, (%rax, %rcx, 1) +4: + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + EXPORT_PC + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_instance_field_offset + testl %eax, %eax + jns 1b + negl %eax + movzbq rINSTbl, %rcx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + andb $$0xf, rINSTbl # rINST <- A + GET_VREG rINST, rINSTq # rINST <- v[A] + movl rINST, (%rcx,%rax,1) + testl rINST, rINST + je 5f + movq rSELF:THREAD_CARD_TABLE_OFFSET, %rax + shrq $$CARD_TABLE_CARD_SHIFT, %rcx + movb %al, (%rcx, %rax, 1) +5: + lock addl $$0, (%rsp) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +NterpGetObjectInstanceField: + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + movl rINST, %ecx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + testb $$READ_BARRIER_TEST_VALUE, GRAY_BYTE_OFFSET(%ecx) + movl (%rcx,%rax,1), %eax + jnz 3f +4: + andb $$0xf,rINSTbl # rINST <- A + SET_VREG_OBJECT %eax, rINSTq # fp[A] <- value + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + EXPORT_PC + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_instance_field_offset + testl %eax, %eax + jns 1b + // For volatile fields, we return a negative offset. Remove the sign + // and no need for any barrier thanks to the memory model. + negl %eax + jmp 1b +3: + // reg00 is eax + call art_quick_read_barrier_mark_reg00 + jmp 4b + +NterpPutObjectStaticField: + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + movl ART_FIELD_OFFSET_OFFSET(%rax), %edx + movl ART_FIELD_DECLARING_CLASS_OFFSET(%rax), %eax + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 3f +5: + GET_VREG %ecx, rINSTq + movl %ecx, (%eax, %edx, 1) + testl %ecx, %ecx + je 4f + movq rSELF:THREAD_CARD_TABLE_OFFSET, %rcx + shrq $$CARD_TABLE_CARD_SHIFT, %rax + movb %cl, (%rax, %rcx, 1) +4: + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_static_field + testq MACRO_LITERAL(1), %rax + je 1b + CLEAR_VOLATILE_MARKER %rax + movl ART_FIELD_OFFSET_OFFSET(%rax), %edx + movl ART_FIELD_DECLARING_CLASS_OFFSET(%rax), %eax + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 7f +6: + movzbl rINSTbl, %ecx + GET_VREG %ecx, %rcx + movl %ecx, (%eax, %edx, 1) + testl %ecx, %ecx + je 8f + movq rSELF:THREAD_CARD_TABLE_OFFSET, %rcx + shrq $$CARD_TABLE_CARD_SHIFT, %rax + movb %cl, (%rax, %rcx, 1) +8: + lock addl $$0, (%rsp) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +3: + call art_quick_read_barrier_mark_reg00 + jmp 5b +7: + call art_quick_read_barrier_mark_reg00 + jmp 6b + +NterpGetObjectStaticField: + // Fast-path which gets the field from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f +1: + movl ART_FIELD_OFFSET_OFFSET(%rax), %edx + movl ART_FIELD_DECLARING_CLASS_OFFSET(%rax), %eax + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 5f +6: + testb $$READ_BARRIER_TEST_VALUE, GRAY_BYTE_OFFSET(%eax) + movl (%eax, %edx, 1), %eax + jnz 3f +4: + SET_VREG_OBJECT %eax, rINSTq # fp[A] <- value + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + EXPORT_PC + call nterp_get_static_field + andq $$-2, %rax + jmp 1b +3: + call art_quick_read_barrier_mark_reg00 + jmp 4b +5: + call art_quick_read_barrier_mark_reg00 + jmp 6b + +NterpGetBooleanStaticField: + OP_SGET load="movsbl", wide=0 + +NterpGetByteStaticField: + OP_SGET load="movsbl", wide=0 + +NterpGetCharStaticField: + OP_SGET load="movzwl", wide=0 + +NterpGetShortStaticField: + OP_SGET load="movswl", wide=0 + +NterpGetWideStaticField: + OP_SGET load="movq", wide=1 + +NterpGetIntStaticField: + OP_SGET load="movl", wide=0 + +NterpPutStaticField: + OP_SPUT rINST_reg=rINST, store="movl", wide=0 + +NterpPutBooleanStaticField: +NterpPutByteStaticField: + OP_SPUT rINST_reg=rINSTbl, store="movb", wide=0 + +NterpPutCharStaticField: +NterpPutShortStaticField: + OP_SPUT rINST_reg=rINSTw, store="movw", wide=0 + +NterpPutWideStaticField: + OP_SPUT rINST_reg=rINSTq, store="movq", wide=1 + +NterpPutInstanceField: + OP_IPUT rINST_reg=rINST, store="movl", wide=0 + +NterpPutBooleanInstanceField: +NterpPutByteInstanceField: + OP_IPUT rINST_reg=rINSTbl, store="movb", wide=0 + +NterpPutCharInstanceField: +NterpPutShortInstanceField: + OP_IPUT rINST_reg=rINSTw, store="movw", wide=0 + +NterpPutWideInstanceField: + OP_IPUT rINST_reg=rINSTq, store="movq", wide=1 + +NterpGetBooleanInstanceField: + OP_IGET load="movzbl", wide=0 + +NterpGetByteInstanceField: + OP_IGET load="movsbl", wide=0 + +NterpGetCharInstanceField: + OP_IGET load="movzwl", wide=0 + +NterpGetShortInstanceField: + OP_IGET load="movswl", wide=0 + +NterpGetWideInstanceField: + OP_IGET load="movq", wide=1 + +NterpGetInstanceField: + OP_IGET load="movl", wide=0 + +NterpInstanceOf: + /* instance-of vA, vB, class@CCCC */ + // Fast-path which gets the class from thread-local cache. + EXPORT_PC + FETCH_FROM_THREAD_CACHE %rsi, 2f + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 5f +1: + movzbl rINSTbl,%edi + sarl $$4,%edi # edi<- B + GET_VREG %edi %rdi # edi<- vB (object) + andb $$0xf,rINSTbl # rINST<- A + testl %edi, %edi + je 3f + call art_quick_instance_of + SET_VREG %eax, rINSTq # fp[A] <- value +4: + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +3: + SET_VREG %edi, rINSTq # fp[A] <-0 + jmp 4b +2: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_class_or_allocate_object + movq %rax, %rsi + jmp 1b +5: + // 06 is %rsi + call art_quick_read_barrier_mark_reg06 + jmp 1b + +NterpCheckCast: + // Fast-path which gets the class from thread-local cache. + EXPORT_PC + FETCH_FROM_THREAD_CACHE %rsi, 3f + cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 4f +1: + GET_VREG %edi, rINSTq + testl %edi, %edi + je 2f + call art_quick_check_instance_of +2: + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +3: + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call nterp_get_class_or_allocate_object + movq %rax, %rsi + jmp 1b +4: + // 06 is %rsi + call art_quick_read_barrier_mark_reg06 + jmp 1b + +NterpHandleHotnessOverflow: + leaq (rPC, rINSTq, 2), %rsi + movq rFP, %rdx + call nterp_hot_method + testq %rax, %rax + jne 1f + leaq (rPC, rINSTq, 2), rPC + FETCH_INST + GOTO_NEXT +1: + // Drop the current frame. + movq -8(rREFS), %rsp + CFI_DEF_CFA(rsp, CALLEE_SAVES_SIZE) + + // Setup the new frame + movq OSR_DATA_FRAME_SIZE(%rax), %rcx + // Given stack size contains all callee saved registers, remove them. + subq $$CALLEE_SAVES_SIZE, %rcx + + // Remember CFA. + movq %rsp, %rbp + CFI_DEF_CFA_REGISTER(rbp) + + subq %rcx, %rsp + movq %rsp, %rdi // rdi := beginning of stack + leaq OSR_DATA_MEMORY(%rax), %rsi // rsi := memory to copy + rep movsb // while (rcx--) { *rdi++ = *rsi++ } + + // Fetch the native PC to jump to and save it in a callee-save register. + movq OSR_DATA_NATIVE_PC(%rax), %rbx + + // Free the memory holding OSR Data. + movq %rax, %rdi + call free + + // Jump to the compiled code. + jmp *%rbx + +// This is the logical end of ExecuteNterpImpl, where the frame info applies. +// EndExecuteNterpImpl includes the methods below as we want the runtime to +// see them as part of the Nterp PCs. +.cfi_endproc + +nterp_to_nterp_static_non_range: + .cfi_startproc + .cfi_def_cfa rsp, 8 + SETUP_STACK_FOR_INVOKE + SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=1, is_string_init=0 + .cfi_endproc + +nterp_to_nterp_string_init_non_range: + .cfi_startproc + .cfi_def_cfa rsp, 8 + SETUP_STACK_FOR_INVOKE + SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=1 + .cfi_endproc + +nterp_to_nterp_instance_non_range: + .cfi_startproc + .cfi_def_cfa rsp, 8 + SETUP_STACK_FOR_INVOKE + SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=0 + .cfi_endproc + +nterp_to_nterp_static_range: + .cfi_startproc + .cfi_def_cfa rsp, 8 + SETUP_STACK_FOR_INVOKE + SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=1 + .cfi_endproc + +nterp_to_nterp_instance_range: + .cfi_startproc + .cfi_def_cfa rsp, 8 + SETUP_STACK_FOR_INVOKE + SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=0 + .cfi_endproc + +nterp_to_nterp_string_init_range: + .cfi_startproc + .cfi_def_cfa rsp, 8 + SETUP_STACK_FOR_INVOKE + SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=1 + .cfi_endproc + +// This is the end of PCs contained by the OatQuickMethodHeader created for the interpreter +// entry point. + FUNCTION_TYPE(EndExecuteNterpImpl) + ASM_HIDDEN SYMBOL(EndExecuteNterpImpl) + .global SYMBOL(EndExecuteNterpImpl) +SYMBOL(EndExecuteNterpImpl): + +// Entrypoints into runtime. +NTERP_TRAMPOLINE nterp_get_static_field, NterpGetStaticField +NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset +NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray +NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange +NTERP_TRAMPOLINE nterp_get_class_or_allocate_object, NterpGetClassOrAllocateObject +NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod +NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod +NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject + +// gen_mterp.py will inline the following definitions +// within [ExecuteNterpImpl, EndExecuteNterpImpl). +%def instruction_end(): + + FUNCTION_TYPE(artNterpAsmInstructionEnd) + ASM_HIDDEN SYMBOL(artNterpAsmInstructionEnd) + .global SYMBOL(artNterpAsmInstructionEnd) +SYMBOL(artNterpAsmInstructionEnd): + // artNterpAsmInstructionEnd is used as landing pad for exception handling. + FETCH_INST + GOTO_NEXT + +%def instruction_start(): + + FUNCTION_TYPE(artNterpAsmInstructionStart) + ASM_HIDDEN SYMBOL(artNterpAsmInstructionStart) + .global SYMBOL(artNterpAsmInstructionStart) +SYMBOL(artNterpAsmInstructionStart) = .L_op_nop + .text + +%def opcode_start(): + ENTRY nterp_${opcode} +%def opcode_end(): + END nterp_${opcode} +%def helper_start(name): + ENTRY ${name} +%def helper_end(name): + END ${name} diff --git a/runtime/interpreter/mterp/x86_64ng/object.S b/runtime/interpreter/mterp/x86_64ng/object.S new file mode 100644 index 0000000000..cb231e3b7e --- /dev/null +++ b/runtime/interpreter/mterp/x86_64ng/object.S @@ -0,0 +1,204 @@ +%def op_check_cast(): + jmp NterpCheckCast + +%def op_iget_boolean(): + jmp NterpGetBooleanInstanceField + +%def op_iget_boolean_quick(): +% op_iget_quick(load="movsbl") + +%def op_iget_byte(): + jmp NterpGetByteInstanceField + +%def op_iget_byte_quick(): +% op_iget_quick(load="movsbl") + +%def op_iget_char(): + jmp NterpGetCharInstanceField + +%def op_iget_char_quick(): +% op_iget_quick(load="movzwl") + +%def op_iget_object(): + jmp NterpGetObjectInstanceField + +%def op_iget_object_quick(): + movzwq 2(rPC), %rax # eax <- field byte offset + movl rINST, %ecx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + testb $$READ_BARRIER_TEST_VALUE, GRAY_BYTE_OFFSET(%ecx) + movl (%rcx,%rax,1), %eax + jnz 2f +1: + andb $$0xf,rINSTbl # rINST <- A + SET_VREG_OBJECT %eax, rINSTq # fp[A] <- value + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 +2: + // reg00 is eax + call art_quick_read_barrier_mark_reg00 + jmp 1b + +%def op_iget_quick(load="movl", wide="0"): + /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick */ + /* op vA, vB, offset@CCCC */ + movl rINST, %ecx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + movzwq 2(rPC), %rax # eax <- field byte offset + testl %ecx, %ecx # is object null? + je common_errNullObject + andb $$0xf,rINSTbl # rINST <- A + .if $wide + movq (%rcx,%rax,1), %rax + SET_WIDE_VREG %rax, rINSTq # fp[A] <- value + .else + ${load} (%rcx,%rax,1), %eax + SET_VREG %eax, rINSTq # fp[A] <- value + .endif + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_iget_short(): + jmp NterpGetShortInstanceField + +%def op_iget_short_quick(): +% op_iget_quick(load="movswl") + +%def op_iget_wide(): + jmp NterpGetWideInstanceField + +%def op_iget_wide_quick(): +% op_iget_quick(load="movq", wide="1") + +%def op_instance_of(): + jmp NterpInstanceOf + +%def op_iget(): + jmp NterpGetInstanceField + +%def op_iput(): + jmp NterpPutInstanceField + +%def op_iput_boolean(): + jmp NterpPutBooleanInstanceField + +%def op_iput_boolean_quick(): +% op_iput_quick(reg="rINSTbl", store="movb") + +%def op_iput_byte(): + jmp NterpPutByteInstanceField + +%def op_iput_byte_quick(): +% op_iput_quick(reg="rINSTbl", store="movb") + +%def op_iput_char(): + jmp NterpPutCharInstanceField + +%def op_iput_char_quick(): +% op_iput_quick(reg="rINSTw", store="movw") + +%def op_iput_object(): + jmp NterpPutObjectInstanceField + +%def op_iput_object_quick(): + movzwq 2(rPC), %rax # eax <- field byte offset + movzbq rINSTbl, %rcx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + andb $$0xf, rINSTbl # rINST <- A + GET_VREG rINST, rINSTq # rINST <- v[A] + movl rINST, (%rcx,%rax,1) + testl rINST, rINST + je 1f + movq rSELF:THREAD_CARD_TABLE_OFFSET, %rax + shrq $$CARD_TABLE_CARD_SHIFT, %rcx + movb %al, (%rcx, %rax, 1) +1: + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_iput_quick(reg="rINST", store="movl"): + /* For: iput-quick, iput-object-quick */ + /* op vA, vB, offset@CCCC */ + movzbq rINSTbl, %rcx # rcx <- BA + sarl $$4, %ecx # ecx <- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + andb $$0xf, rINSTbl # rINST <- A + GET_VREG rINST, rINSTq # rINST <- v[A] + movzwq 2(rPC), %rax # rax <- field byte offset + ${store} ${reg}, (%rcx,%rax,1) + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_iput_short(): + jmp NterpPutShortInstanceField + +%def op_iput_short_quick(): +% op_iput_quick(reg="rINSTw", store="movw") + +%def op_iput_wide(): + jmp NterpPutWideInstanceField + +%def op_iput_wide_quick(): + /* iput-wide-quick vA, vB, offset@CCCC */ + movzbq rINSTbl, %rcx # rcx<- BA + sarl $$4, %ecx # ecx<- B + GET_VREG %ecx, %rcx # vB (object we're operating on) + testl %ecx, %ecx # is object null? + je common_errNullObject + movzwq 2(rPC), %rax # rax<- field byte offset + leaq (%rcx,%rax,1), %rcx # ecx<- Address of 64-bit target + andb $$0xf, rINSTbl # rINST<- A + GET_WIDE_VREG %rax, rINSTq # rax<- fp[A]/fp[A+1] + movq %rax, (%rcx) # obj.field<- r0/r1 + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_sget(load="movl", wide="0"): + jmp NterpGetIntStaticField + +%def op_sget_boolean(): + jmp NterpGetBooleanStaticField + +%def op_sget_byte(): + jmp NterpGetByteStaticField + +%def op_sget_char(): + jmp NterpGetCharStaticField + +%def op_sget_object(): + jmp NterpGetObjectStaticField + +%def op_sget_short(): + jmp NterpGetShortStaticField + +%def op_sget_wide(): + jmp NterpGetWideStaticField + +%def op_sput(): + jmp NterpPutStaticField + +%def op_sput_boolean(): + jmp NterpPutBooleanStaticField + +%def op_sput_byte(): + jmp NterpPutByteStaticField + +%def op_sput_char(): + jmp NterpPutCharStaticField + +%def op_sput_object(): + jmp NterpPutObjectStaticField + +%def op_sput_short(): + jmp NterpPutShortStaticField + +%def op_sput_wide(): + jmp NterpPutWideStaticField + +%def op_new_instance(): + // The routine is too big to fit in a handler, so jump to it. + jmp NterpNewInstance diff --git a/runtime/interpreter/mterp/x86_64ng/other.S b/runtime/interpreter/mterp/x86_64ng/other.S new file mode 100644 index 0000000000..7d82c3b8cc --- /dev/null +++ b/runtime/interpreter/mterp/x86_64ng/other.S @@ -0,0 +1,273 @@ +%def unused(): + int3 + +%def op_const(): + /* const vAA, #+BBBBbbbb */ + movl 2(rPC), %eax # grab all 32 bits at once + SET_VREG %eax, rINSTq # vAA<- eax + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + +%def op_const_16(): + /* const/16 vAA, #+BBBB */ + movswl 2(rPC), %ecx # ecx <- ssssBBBB + SET_VREG %ecx, rINSTq # vAA <- ssssBBBB + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_const_4(): + /* const/4 vA, #+B */ + movsbl rINSTbl, %eax # eax <-ssssssBx + andl MACRO_LITERAL(0xf), rINST # rINST <- A + sarl MACRO_LITERAL(4), %eax + SET_VREG %eax, rINSTq + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_const_high16(): + /* const/high16 vAA, #+BBBB0000 */ + movzwl 2(rPC), %eax # eax <- 0000BBBB + sall MACRO_LITERAL(16), %eax # eax <- BBBB0000 + SET_VREG %eax, rINSTq # vAA <- eax + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_const_object(jumbo="0", helper="nterp_load_object"): + // Fast-path which gets the object from thread-local cache. + FETCH_FROM_THREAD_CACHE %rax, 2f + cmpq MACRO_LITERAL(0), rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET + jne 3f +1: + SET_VREG_OBJECT %eax, rINSTq # vAA <- value + .if $jumbo + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + .else + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + .endif +2: + EXPORT_PC + movq rSELF:THREAD_SELF_OFFSET, %rdi + movq 0(%rsp), %rsi + movq rPC, %rdx + call SYMBOL($helper) + jmp 1b +3: + // 00 is %rax + call art_quick_read_barrier_mark_reg00 + jmp 1b + +%def op_const_class(): +% op_const_object(jumbo="0", helper="nterp_get_class_or_allocate_object") + +%def op_const_method_handle(): +% op_const_object(jumbo="0") + +%def op_const_method_type(): +% op_const_object(jumbo="0") + +%def op_const_string(): + /* const/string vAA, String@BBBB */ +% op_const_object(jumbo="0") + +%def op_const_string_jumbo(): + /* const/string vAA, String@BBBBBBBB */ +% op_const_object(jumbo="1") + +%def op_const_wide(): + /* const-wide vAA, #+HHHHhhhhBBBBbbbb */ + movq 2(rPC), %rax # rax <- HHHHhhhhBBBBbbbb + SET_WIDE_VREG %rax, rINSTq + ADVANCE_PC_FETCH_AND_GOTO_NEXT 5 + +%def op_const_wide_16(): + /* const-wide/16 vAA, #+BBBB */ + movswq 2(rPC), %rax # rax <- ssssssssssssBBBB + SET_WIDE_VREG %rax, rINSTq # store + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_const_wide_32(): + /* const-wide/32 vAA, #+BBBBbbbb */ + movslq 2(rPC), %rax # eax <- ssssssssBBBBbbbb + SET_WIDE_VREG %rax, rINSTq # store + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + +%def op_const_wide_high16(): + /* const-wide/high16 vAA, #+BBBB000000000000 */ + movzwq 2(rPC), %rax # eax <- 000000000000BBBB + salq $$48, %rax # eax <- 00000000BBBB0000 + SET_WIDE_VREG %rax, rINSTq # v[AA+0] <- eax + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_monitor_enter(): +/* + * Synchronize on an object. + */ + /* monitor-enter vAA */ + EXPORT_PC + GET_VREG %edi, rINSTq + call art_quick_lock_object + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_monitor_exit(): +/* + * Unlock an object. + * + * Exceptions that occur when unlocking a monitor need to appear as + * if they happened at the following instruction. See the Dalvik + * instruction spec. + */ + /* monitor-exit vAA */ + EXPORT_PC + GET_VREG %edi, rINSTq + call art_quick_unlock_object + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_move(is_object="0"): + /* for move, move-object, long-to-int */ + /* op vA, vB */ + movl rINST, %eax # eax <- BA + andb $$0xf, %al # eax <- A + shrl $$4, rINST # rINST <- B + GET_VREG %edx, rINSTq + .if $is_object + SET_VREG_OBJECT %edx, %rax # fp[A] <- fp[B] + .else + SET_VREG %edx, %rax # fp[A] <- fp[B] + .endif + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_move_16(is_object="0"): + /* for: move/16, move-object/16 */ + /* op vAAAA, vBBBB */ + movzwq 4(rPC), %rcx # ecx <- BBBB + movzwq 2(rPC), %rax # eax <- AAAA + GET_VREG %edx, %rcx + .if $is_object + SET_VREG_OBJECT %edx, %rax # fp[A] <- fp[B] + .else + SET_VREG %edx, %rax # fp[A] <- fp[B] + .endif + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + +%def op_move_exception(): + /* move-exception vAA */ + movl rSELF:THREAD_EXCEPTION_OFFSET, %eax + SET_VREG_OBJECT %eax, rINSTq # fp[AA] <- exception object + movl $$0, rSELF:THREAD_EXCEPTION_OFFSET + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_move_from16(is_object="0"): + /* for: move/from16, move-object/from16 */ + /* op vAA, vBBBB */ + movzwq 2(rPC), %rax # eax <- BBBB + GET_VREG %edx, %rax # edx <- fp[BBBB] + .if $is_object + SET_VREG_OBJECT %edx, rINSTq # fp[A] <- fp[B] + .else + SET_VREG %edx, rINSTq # fp[A] <- fp[B] + .endif + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_move_object(): +% op_move(is_object="1") + +%def op_move_object_16(): +% op_move_16(is_object="1") + +%def op_move_object_from16(): +% op_move_from16(is_object="1") + +%def op_move_result(is_object="0"): + /* for: move-result, move-result-object */ + /* op vAA */ + .if $is_object + SET_VREG_OBJECT %eax, rINSTq # fp[A] <- fp[B] + .else + SET_VREG %eax, rINSTq # fp[A] <- fp[B] + .endif + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_move_result_object(): +% op_move_result(is_object="1") + +%def op_move_result_wide(): + /* move-result-wide vAA */ + SET_WIDE_VREG %rax, rINSTq # v[AA] <- rdx + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_move_wide(): + /* move-wide vA, vB */ + /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */ + movl rINST, %ecx # ecx <- BA + sarl $$4, rINST # rINST <- B + andb $$0xf, %cl # ecx <- A + GET_WIDE_VREG %rdx, rINSTq # rdx <- v[B] + SET_WIDE_VREG %rdx, %rcx # v[A] <- rdx + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_move_wide_16(): + /* move-wide/16 vAAAA, vBBBB */ + /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */ + movzwq 4(rPC), %rcx # ecx<- BBBB + movzwq 2(rPC), %rax # eax<- AAAA + GET_WIDE_VREG %rdx, %rcx # rdx <- v[B] + SET_WIDE_VREG %rdx, %rax # v[A] <- rdx + ADVANCE_PC_FETCH_AND_GOTO_NEXT 3 + +%def op_move_wide_from16(): + /* move-wide/from16 vAA, vBBBB */ + /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */ + movzwl 2(rPC), %ecx # ecx <- BBBB + GET_WIDE_VREG %rdx, %rcx # rdx <- v[B] + SET_WIDE_VREG %rdx, rINSTq # v[A] <- rdx + ADVANCE_PC_FETCH_AND_GOTO_NEXT 2 + +%def op_nop(): + ADVANCE_PC_FETCH_AND_GOTO_NEXT 1 + +%def op_unused_3e(): +% unused() + +%def op_unused_3f(): +% unused() + +%def op_unused_40(): +% unused() + +%def op_unused_41(): +% unused() + +%def op_unused_42(): +% unused() + +%def op_unused_43(): +% unused() + +%def op_unused_79(): +% unused() + +%def op_unused_7a(): +% unused() + +%def op_unused_f3(): +% unused() + +%def op_unused_f4(): +% unused() + +%def op_unused_f5(): +% unused() + +%def op_unused_f6(): +% unused() + +%def op_unused_f7(): +% unused() + +%def op_unused_f8(): +% unused() + +%def op_unused_f9(): +% unused() + +%def op_unused_fc(): +% unused() + +%def op_unused_fd(): +% unused() diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index 6e89973850..48a51f1c5a 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -1498,6 +1498,9 @@ bool Jit::MaybeCompileMethod(Thread* self, } void Jit::EnqueueOptimizedCompilation(ArtMethod* method, Thread* self) { + if (thread_pool_ == nullptr) { + return; + } // We arrive here after a baseline compiled code has reached its baseline // hotness threshold. If tiered compilation is enabled, enqueue a compilation // task that will compile optimize the method. @@ -1744,5 +1747,21 @@ bool Jit::CanAssumeInitialized(ObjPtr<mirror::Class> cls, bool is_for_shared_reg } } +void Jit::EnqueueCompilationFromNterp(ArtMethod* method, Thread* self) { + if (thread_pool_ == nullptr) { + return; + } + if (GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) { + // If we already have compiled code for it, nterp may be stuck in a loop. + // Compile OSR. + thread_pool_->AddTask( + self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompileOsr)); + return; + } + ProfilingInfo::Create(self, method, /* retry_allocation= */ false); + thread_pool_->AddTask( + self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompileBaseline)); +} + } // namespace jit } // namespace art diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index 8d5676be03..e9fd915fc1 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -26,6 +26,7 @@ #include "base/timing_logger.h" #include "handle.h" #include "offsets.h" +#include "interpreter/mterp/mterp.h" #include "jit/debugger_interface.h" #include "jit/profile_saver_options.h" #include "obj_ptr.h" @@ -120,7 +121,9 @@ class JitOptions { } bool CanCompileBaseline() const { - return use_tiered_jit_compilation_ || use_baseline_compiler_; + return use_tiered_jit_compilation_ || + use_baseline_compiler_ || + interpreter::IsNterpSupported(); } void SetUseJitCompilation(bool b) { @@ -435,6 +438,9 @@ class Jit { void EnqueueOptimizedCompilation(ArtMethod* method, Thread* self); + void EnqueueCompilationFromNterp(ArtMethod* method, Thread* self) + REQUIRES_SHARED(Locks::mutator_lock_); + private: Jit(JitCodeCache* code_cache, JitOptions* options); diff --git a/runtime/nterp_helpers.cc b/runtime/nterp_helpers.cc index df0eb7305a..9b265c2cac 100644 --- a/runtime/nterp_helpers.cc +++ b/runtime/nterp_helpers.cc @@ -92,7 +92,7 @@ static constexpr size_t NterpGetFrameEntrySize() { return (POPCOUNT(core_spills) + POPCOUNT(fp_spills)) * kPointerSize; } -static size_t NterpGetFrameSize(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { +size_t NterpGetFrameSize(ArtMethod* method) { CodeItemDataAccessor accessor(method->DexInstructionData()); const uint16_t num_regs = accessor.RegistersSize(); const uint16_t out_regs = accessor.OutsSize(); diff --git a/runtime/nterp_helpers.h b/runtime/nterp_helpers.h index 758d1fdfc0..7dbf92efaa 100644 --- a/runtime/nterp_helpers.h +++ b/runtime/nterp_helpers.h @@ -24,6 +24,12 @@ namespace art { class ArtMethod; /** + * The frame size nterp will use for the given method. + */ +size_t NterpGetFrameSize(ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_); + +/** * Returns the QuickMethodFrameInfo of the given frame corresponding to the * given method. */ diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 0e04b7b696..910b389cf3 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -437,7 +437,11 @@ class DeoptimizeStackVisitor final : public StackVisitor { updated_vregs = GetThread()->GetUpdatedVRegFlags(frame_id); DCHECK(updated_vregs != nullptr); } - HandleOptimizingDeoptimization(method, new_frame, updated_vregs); + if (GetCurrentOatQuickMethodHeader()->IsNterpMethodHeader()) { + HandleNterpDeoptimization(method, new_frame, updated_vregs); + } else { + HandleOptimizingDeoptimization(method, new_frame, updated_vregs); + } if (updated_vregs != nullptr) { // Calling Thread::RemoveDebuggerShadowFrameMapping will also delete the updated_vregs // array so this must come after we processed the frame. @@ -467,6 +471,35 @@ class DeoptimizeStackVisitor final : public StackVisitor { } private: + void HandleNterpDeoptimization(ArtMethod* m, + ShadowFrame* new_frame, + const bool* updated_vregs) + REQUIRES_SHARED(Locks::mutator_lock_) { + ArtMethod** cur_quick_frame = GetCurrentQuickFrame(); + StackReference<mirror::Object>* vreg_ref_base = + reinterpret_cast<StackReference<mirror::Object>*>(NterpGetReferenceArray(cur_quick_frame)); + int32_t* vreg_int_base = + reinterpret_cast<int32_t*>(NterpGetRegistersArray(cur_quick_frame)); + CodeItemDataAccessor accessor(m->DexInstructionData()); + const uint16_t num_regs = accessor.RegistersSize(); + // An nterp frame has two arrays: a dex register array and a reference array + // that shadows the dex register array but only containing references + // (non-reference dex registers have nulls). See nterp_helpers.cc. + for (size_t reg = 0; reg < num_regs; ++reg) { + if (updated_vregs != nullptr && updated_vregs[reg]) { + // Keep the value set by debugger. + continue; + } + StackReference<mirror::Object>* ref_addr = vreg_ref_base + reg; + mirror::Object* ref = ref_addr->AsMirrorPtr(); + if (ref != nullptr) { + new_frame->SetVRegReference(reg, ref); + } else { + new_frame->SetVReg(reg, vreg_int_base[reg]); + } + } + } + void HandleOptimizingDeoptimization(ArtMethod* m, ShadowFrame* new_frame, const bool* updated_vregs) diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 8861a095c7..99980c50b5 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -150,6 +150,7 @@ #include "oat.h" #include "oat_file.h" #include "oat_file_manager.h" +#include "oat_quick_method_header.h" #include "object_callbacks.h" #include "parsed_options.h" #include "quick/quick_method_frame_info.h" @@ -850,13 +851,15 @@ bool Runtime::Start() { if (!IsImageDex2OatEnabled() || !GetHeap()->HasBootImageSpace()) { ScopedObjectAccess soa(self); - StackHandleScope<2> hs(soa.Self()); + StackHandleScope<3> hs(soa.Self()); ObjPtr<mirror::ObjectArray<mirror::Class>> class_roots = GetClassLinker()->GetClassRoots(); auto class_class(hs.NewHandle<mirror::Class>(GetClassRoot<mirror::Class>(class_roots))); + auto string_class(hs.NewHandle<mirror::Class>(GetClassRoot<mirror::String>(class_roots))); auto field_class(hs.NewHandle<mirror::Class>(GetClassRoot<mirror::Field>(class_roots))); class_linker_->EnsureInitialized(soa.Self(), class_class, true, true); + class_linker_->EnsureInitialized(soa.Self(), string_class, true, true); self->AssertNoPendingException(); // Field class is needed for register_java_net_InetAddress in libcore, b/28153851. class_linker_->EnsureInitialized(soa.Self(), field_class, true, true); @@ -2727,6 +2730,11 @@ bool Runtime::IsVerificationSoftFail() const { } bool Runtime::IsAsyncDeoptimizeable(uintptr_t code) const { + if (OatQuickMethodHeader::NterpMethodHeader != nullptr) { + if (OatQuickMethodHeader::NterpMethodHeader->Contains(code)) { + return true; + } + } // We only support async deopt (ie the compiled code is not explicitly asking for // deopt, but something else like the debugger) in debuggable JIT code. // We could look at the oat file where `code` is being defined, diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index c91da68387..877a5a0560 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -75,7 +75,7 @@ RUNTIME_OPTIONS_KEY (Unit, LowMemoryMode) RUNTIME_OPTIONS_KEY (bool, UseTLAB, (kUseTlab || kUseReadBarrier)) RUNTIME_OPTIONS_KEY (bool, EnableHSpaceCompactForOOM, true) RUNTIME_OPTIONS_KEY (bool, UseJitCompilation, true) -RUNTIME_OPTIONS_KEY (bool, UseTieredJitCompilation, false) +RUNTIME_OPTIONS_KEY (bool, UseTieredJitCompilation, interpreter::IsNterpSupported()) RUNTIME_OPTIONS_KEY (bool, DumpNativeStackOnSigQuit, true) RUNTIME_OPTIONS_KEY (bool, MadviseRandomAccess, false) RUNTIME_OPTIONS_KEY (JniIdType, OpaqueJniIds, JniIdType::kDefault) // -Xopaque-jni-ids:{true, false, swapable} diff --git a/runtime/stack.cc b/runtime/stack.cc index 410e0fd144..72690da088 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -748,14 +748,13 @@ void StackVisitor::SanityCheckFrame() const { // Frame sanity. size_t frame_size = GetCurrentQuickFrameInfo().FrameSizeInBytes(); CHECK_NE(frame_size, 0u); - // A rough guess at an upper size we expect to see for a frame. + // For compiled code, we could try to have a rough guess at an upper size we expect + // to see for a frame: // 256 registers // 2 words HandleScope overhead // 3+3 register spills - // TODO: this seems architecture specific for the case of JNI frames. - // TODO: 083-compiler-regressions ManyFloatArgs shows this estimate is wrong. // const size_t kMaxExpectedFrameSize = (256 + 2 + 3 + 3) * sizeof(word); - const size_t kMaxExpectedFrameSize = 2 * KB; + const size_t kMaxExpectedFrameSize = interpreter::kMaxNterpFrame; CHECK_LE(frame_size, kMaxExpectedFrameSize) << method->PrettyMethod(); size_t return_pc_offset = GetCurrentQuickFrameInfo().GetReturnPcOffset(); CHECK_LT(return_pc_offset, frame_size); @@ -852,7 +851,6 @@ void StackVisitor::WalkStack(bool include_transitions) { cur_quick_frame_ = current_fragment->GetTopQuickFrame(); cur_quick_frame_pc_ = 0; cur_oat_quick_method_header_ = nullptr; - if (cur_quick_frame_ != nullptr) { // Handle quick stack frames. // Can't be both a shadow and a quick fragment. DCHECK(current_fragment->GetTopShadowFrame() == nullptr); diff --git a/test/566-polymorphic-inlining/src/Main.java b/test/566-polymorphic-inlining/src/Main.java index 793b85f5e5..e34d27a975 100644 --- a/test/566-polymorphic-inlining/src/Main.java +++ b/test/566-polymorphic-inlining/src/Main.java @@ -47,7 +47,7 @@ public class Main implements Itf { // Make testInvokeVirtual and testInvokeInterface hot to get them jitted. // We pass Main and Subclass to get polymorphic inlining based on calling // the same method. - for (int i = 0; i < 10000; ++i) { + for (int i = 0; i < 1000000; ++i) { testInvokeVirtual(mains[0]); testInvokeVirtual(mains[1]); testInvokeInterface(itfs[0]); @@ -78,7 +78,7 @@ public class Main implements Itf { // Run this once to make sure we execute the JITted code. $noinline$testInlineToSameTarget(mains[0]); - assertEquals(20001, counter); + assertEquals(2000001, counter); } public Class<?> sameInvokeVirtual() { diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc index ee978c2675..22423e2b6b 100644 --- a/test/570-checker-osr/osr.cc +++ b/test/570-checker-osr/osr.cc @@ -90,7 +90,8 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInInterpreter(JNIEnv* env, const OatQuickMethodHeader* header = Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m); if ((header == nullptr || header != stack_visitor->GetCurrentOatQuickMethodHeader()) && - stack_visitor->IsShadowFrame()) { + (stack_visitor->IsShadowFrame() || + stack_visitor->GetCurrentOatQuickMethodHeader()->IsNterpMethodHeader())) { in_interpreter = true; } }); diff --git a/test/638-checker-inline-cache-intrinsic/src/Main.java b/test/638-checker-inline-cache-intrinsic/src/Main.java index 1449f0a867..5334487dfa 100644 --- a/test/638-checker-inline-cache-intrinsic/src/Main.java +++ b/test/638-checker-inline-cache-intrinsic/src/Main.java @@ -64,10 +64,10 @@ public class Main { public static void test() { // Warm up inline cache. - for (int i = 0; i < 450; i++) { + for (int i = 0; i < 600000; i++) { $noinline$inlineMonomorphic(str); } - for (int i = 0; i < 600; i++) { + for (int i = 0; i < 600000; i++) { $noinline$stringEquals(str); } ensureJitCompiled(Main.class, "$noinline$stringEquals"); diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc index 4ca5fe8333..22dbcce018 100644 --- a/test/common/runtime_state.cc +++ b/test/common/runtime_state.cc @@ -179,7 +179,8 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_isAotCompiled(JNIEnv* env, } const void* actual_code = method->GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize); bool interpreter = - Runtime::Current()->GetClassLinker()->ShouldUseInterpreterEntrypoint(method, actual_code); + Runtime::Current()->GetClassLinker()->ShouldUseInterpreterEntrypoint(method, actual_code) || + (actual_code == interpreter::GetNterpEntryPoint()); return !interpreter; } diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc index e8160b4322..79c7a36a8e 100644 --- a/test/common/stack_inspect.cc +++ b/test/common/stack_inspect.cc @@ -25,6 +25,7 @@ #include "mirror/class-inl.h" #include "nth_caller_visitor.h" #include "oat_file.h" +#include "oat_quick_method_header.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" #include "stack.h" @@ -47,7 +48,10 @@ static jboolean IsInterpreted(JNIEnv* env, jclass, size_t level) { NthCallerVisitor caller(soa.Self(), level, false); caller.WalkStack(); CHECK(caller.caller != nullptr); - return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE; + bool is_shadow_frame = (caller.GetCurrentShadowFrame() != nullptr); + bool is_nterp_frame = (caller.GetCurrentQuickFrame() != nullptr) && + (caller.GetCurrentOatQuickMethodHeader()->IsNterpMethodHeader()); + return (is_shadow_frame || is_nterp_frame) ? JNI_TRUE : JNI_FALSE; } // public static native boolean isInterpreted(); diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def index 72cd2a98f1..4fee6df57d 100644 --- a/tools/cpp-define-generator/thread.def +++ b/tools/cpp-define-generator/thread.def @@ -39,6 +39,8 @@ ASM_DEFINE(THREAD_INTERPRETER_CACHE_SIZE_LOG2, art::Thread::InterpreterCacheSizeLog2()) ASM_DEFINE(THREAD_INTERPRETER_CACHE_SIZE_MASK, (sizeof(art::InterpreterCache::Entry) * (art::InterpreterCache::kSize - 1))) +ASM_DEFINE(THREAD_INTERPRETER_CACHE_SIZE_SHIFT, + 2) ASM_DEFINE(THREAD_IS_GC_MARKING_OFFSET, art::Thread::IsGcMarkingOffset<art::kRuntimePointerSize>().Int32Value()) ASM_DEFINE(THREAD_LOCAL_ALLOC_STACK_END_OFFSET, |