Merge "Fixing structure of native frame for Generic JNI"
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index def7b68..d28b0fe 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -219,8 +219,15 @@
       } else {
         const void* method_code = GetQuickGenericJniTrampoline();
         mirror::ArtMethod* callee_save_method = runtime_->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+
+        // Compute Sirt size, as Sirt goes into frame
+        MethodHelper mh(method);
+        uint32_t sirt_refs = mh.GetNumberOfReferenceArgsWithoutReceiver() + 1;
+        uint32_t sirt_size = StackIndirectReferenceTable::SizeOf(sirt_refs);
+
         OatFile::OatMethod oat_method = CreateOatMethod(method_code,
-                                                        callee_save_method->GetFrameSizeInBytes(),
+                                                        callee_save_method->GetFrameSizeInBytes() +
+                                                            sirt_size,
                                                         callee_save_method->GetCoreSpillMask(),
                                                         callee_save_method->GetFpSpillMask(),
                                                         nullptr,
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index ffd7b41..c5219a6 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -364,7 +364,7 @@
   OatClass* oat_class = oat_classes_[oat_class_index];
   CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
-  if (compiled_method != NULL) {
+  if (compiled_method != nullptr) {
     const std::vector<uint8_t>* portable_code = compiled_method->GetPortableCode();
     const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
     if (portable_code != nullptr) {
@@ -495,6 +495,33 @@
 
 
   if (compiler_driver_->IsImage()) {
+    // Derive frame size and spill masks for native methods without code:
+    // These are generic JNI methods...
+    if (is_native && compiled_method == nullptr) {
+      // Compute Sirt size as putting _every_ reference into it, even null ones.
+      uint32_t s_len;
+      const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx), &s_len);
+      DCHECK(shorty != nullptr);
+      uint32_t refs = 1;    // Native method always has "this" or class.
+      for (uint32_t i = 1; i < s_len; ++i) {
+        if (shorty[i] == 'L') {
+          refs++;
+        }
+      }
+      size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSize(refs);
+
+      // Get the generic spill masks and base frame size.
+      mirror::ArtMethod* callee_save_method =
+          Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
+
+      frame_size_in_bytes = callee_save_method->GetFrameSizeInBytes() + sirt_size;
+      core_spill_mask = callee_save_method->GetCoreSpillMask();
+      fp_spill_mask = callee_save_method->GetFpSpillMask();
+      mapping_table_offset = 0;
+      vmap_table_offset = 0;
+      gc_map_offset = 0;
+    }
+
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 340758f..5fbf8cb 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -711,6 +711,13 @@
  * |                   |
  * | caller method...  |
  * #-------------------#    <--- SP on entry
+ *
+ *          |
+ *          V
+ *
+ * #-------------------#
+ * | caller method...  |
+ * #-------------------#
  * | Return            |
  * | R15               |    callee save
  * | R14               |    callee save
@@ -734,22 +741,7 @@
  * | Padding           |
  * | RDI/Method*       |  <- sp
  * #-------------------#
- * | local ref cookie  | // 4B
- * |   padding         | // 4B
- * #----------#--------#
- * |          |      | |
- * | Temp/    | SIRT | |    Scratch frame is 4k
- * | Scratch  |      v |
- * | Frame    #--------|
- * |                   |
- * |          #--------|
- * |          |      ^ |
- * |          | JNI  | |
- * |          | Stack| |
- * #----------#--------#    <--- SP on native call (needs alignment?)
- * |                   |
- * | Stack for Regs    |    The trampoline assembly will pop these values
- * |                   |    into registers for native call
+ * | Scratch Alloca    |    5K scratch space
  * #---------#---------#
  * |         | sp*     |
  * | Tramp.  #---------#
@@ -757,6 +749,35 @@
  * | Tramp.  #---------#
  * |         | method  |
  * #-------------------#    <--- SP on artQuickGenericJniTrampoline
+ *
+ *           |
+ *           v              artQuickGenericJniTrampoline
+ *
+ * #-------------------#
+ * | caller method...  |
+ * #-------------------#
+ * | Return            |
+ * | Callee-Save Data  |
+ * #-------------------#
+ * | SIRT              |
+ * #-------------------#
+ * | Method*           |    <--- (1)
+ * #-------------------#
+ * | local ref cookie  | // 4B
+ * | SIRT size         | // 4B   TODO: roll into call stack alignment?
+ * #-------------------#
+ * | JNI Call Stack    |
+ * #-------------------#    <--- SP on native call
+ * |                   |
+ * | Stack for Regs    |    The trampoline assembly will pop these values
+ * |                   |    into registers for native call
+ * #-------------------#
+ * | Native code ptr   |
+ * #-------------------#
+ * | Free scratch      |
+ * #-------------------#
+ * | Ptr to (1)        |    <--- RSP
+ * #-------------------#
  */
     /*
      * Called to do a generic JNI down-call
@@ -789,7 +810,8 @@
     // Store native ArtMethod* to bottom of stack.
     movq %rdi, 0(%rsp)
     movq %rsp, %rbp                 // save SP at callee-save frame
-    CFI_DEF_CFA_REGISTER(rbp)
+    movq %rsp, %rbx
+    CFI_DEF_CFA_REGISTER(rbx)
     //
     // reserve a lot of space
     //
@@ -815,12 +837,19 @@
     movq %gs:THREAD_SELF_OFFSET, %rdi
     movq %rbp, %rsi
     call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
-    test %rax, %rax                 // Check for error, negative value.
+
+    // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
+    // get the adjusted frame pointer
+    popq %rbp
+
+    // Check for error, negative value.
+    test %rax, %rax
     js .Lentry_error
-    // release part of the alloca
+
+    // release part of the alloca, get the code pointer
     addq %rax, %rsp
-    // get the code pointer
     popq %rax
+
     // pop from the register-passing alloca region
     // what's the right layout?
     popq %rdi
@@ -853,7 +882,7 @@
     call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
 
     // Tear down the alloca.
-    movq %rbp, %rsp
+    movq %rbx, %rsp
     CFI_DEF_CFA_REGISTER(rsp)
 
     // Pending exceptions possible.
@@ -891,12 +920,35 @@
     movq %rax, %xmm0
     ret
 .Lentry_error:
-    movq %rbp, %rsp
+    movq %rbx, %rsp
+    CFI_DEF_CFA_REGISTER(rsp)
 .Lexception_in_native:
-    CFI_REL_OFFSET(rsp,176)
     // TODO: the SIRT contains the this pointer which is used by the debugger for exception
     //       delivery.
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    movq %xmm0, 16(%rsp)         // doesn't make sense!!!
+    movq 24(%rsp), %xmm1            // neither does this!!!
+    movq 32(%rsp), %xmm2
+    movq 40(%rsp), %xmm3
+    movq 48(%rsp), %xmm4
+    movq 56(%rsp), %xmm5
+    movq 64(%rsp), %xmm6
+    movq 72(%rsp), %xmm7
+    // was 80 bytes
+    addq LITERAL(80), %rsp
+    CFI_ADJUST_CFA_OFFSET(-80)
+    // Save callee and GPR args, mixed together to agree with core spills bitmap.
+    POP rcx  // Arg.
+    POP rdx  // Arg.
+    POP rbx  // Callee save.
+    POP rbp  // Callee save.
+    POP rsi  // Arg.
+    POP r8   // Arg.
+    POP r9   // Arg.
+    POP r12  // Callee save.
+    POP r13  // Callee save.
+    POP r14  // Callee save.
+    POP r15  // Callee save.
+
     DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_generic_jni_trampoline
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 08de95f..9489d9b 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -856,9 +856,10 @@
  *
  * void PushStack(uintptr_t): Push a value to the stack.
  *
- * uintptr_t PushSirt(mirror::Object* ref): Add a reference to the Sirt. Is guaranteed != nullptr.
+ * uintptr_t PushSirt(mirror::Object* ref): Add a reference to the Sirt. This _will_ have nullptr,
+ *                                          as this might be important for null initialization.
  *                                          Must return the jobject, that is, the reference to the
- *                                          entry in the Sirt.
+ *                                          entry in the Sirt (nullptr if necessary).
  *
  */
 template <class T> class BuildGenericJniFrameStateMachine {
@@ -948,12 +949,7 @@
   }
 
   void AdvanceSirt(mirror::Object* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uintptr_t sirtRef;
-    if (ptr != nullptr) {
-      sirtRef = PushSirt(ptr);
-    } else {
-      sirtRef = reinterpret_cast<uintptr_t>(nullptr);
-    }
+    uintptr_t sirtRef = PushSirt(ptr);
     if (HaveSirtGpr()) {
       gpr_index_--;
       PushGpr(sirtRef);
@@ -1155,49 +1151,49 @@
  public:
   ComputeGenericJniFrameSize() : num_sirt_references_(0), num_stack_entries_(0) {}
 
-  // (negative) offset from SP to top of Sirt.
-  uint32_t GetSirtOffset() {
-    return 8;
-  }
-
-  uint32_t GetFirstSirtEntryOffset() {
-    return GetSirtOffset() + sizeof(StackReference<mirror::Object>);
-  }
-
-  uint32_t GetNumSirtReferences() {
-    return num_sirt_references_;
-  }
-
   uint32_t GetStackSize() {
     return num_stack_entries_ * sizeof(uintptr_t);
   }
 
-  void ComputeLayout(bool is_static, const char* shorty, uint32_t shorty_len, void* sp,
-                     StackReference<mirror::Object>** start_sirt, StackIndirectReferenceTable** table,
-                     uint32_t* sirt_entries, uintptr_t** start_stack, uintptr_t** start_gpr,
-                     uint32_t** start_fpr, void** code_return, size_t* overall_size)
+  // WARNING: After this, *sp won't be pointing to the method anymore!
+  void ComputeLayout(mirror::ArtMethod*** m, bool is_static, const char* shorty, uint32_t shorty_len,
+                     void* sp, StackIndirectReferenceTable** table, uint32_t* sirt_entries,
+                     uintptr_t** start_stack, uintptr_t** start_gpr, uint32_t** start_fpr,
+                     void** code_return, size_t* overall_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ComputeAll(is_static, shorty, shorty_len);
 
+    mirror::ArtMethod* method = **m;
+
     uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
-    *start_sirt = reinterpret_cast<StackReference<mirror::Object>*>(sp8-GetFirstSirtEntryOffset());
 
-    // Add padding entries if necessary for alignment.
-    if (sizeof(uintptr_t) < sizeof(uint64_t)) {
-      uint32_t size = sizeof(uintptr_t) * num_sirt_references_;
-      uint32_t rem = size % 8;
-      if (rem != 0) {
-        DCHECK_EQ(rem, 4U);
-        num_sirt_references_++;
-      }
-    }
+    // First, fix up the layout of the callee-save frame.
+    // We have to squeeze in the Sirt, and relocate the method pointer.
+
+    // "Free" the slot for the method.
+    sp8 += kPointerSize;
+
+    // Add the Sirt.
     *sirt_entries = num_sirt_references_;
-    size_t sirt_size = StackIndirectReferenceTable::SizeOf(num_sirt_references_);
-    sp8 -= GetSirtOffset() + sirt_size;
+    size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSize(num_sirt_references_);
+    sp8 -= sirt_size;
     *table = reinterpret_cast<StackIndirectReferenceTable*>(sp8);
+    (*table)->SetNumberOfReferences(num_sirt_references_);
 
+    // Add a slot for the method pointer, and fill it. Fix the pointer-pointer given to us.
+    sp8 -= kPointerSize;
+    uint8_t* method_pointer = sp8;
+    *(reinterpret_cast<mirror::ArtMethod**>(method_pointer)) = method;
+    *m = reinterpret_cast<mirror::ArtMethod**>(method_pointer);
+
+    // Reference cookie and padding
+    sp8 -= 8;
+    // Store Sirt size
+    *reinterpret_cast<uint32_t*>(sp8) = static_cast<uint32_t>(sirt_size & 0xFFFFFFFF);
+
+    // Next comes the native call stack.
     sp8 -= GetStackSize();
-    // Now align the call stack under the Sirt. This aligns by 16.
+    // Now align the call stack below. This aligns by 16, as AArch64 seems to require.
     uintptr_t mask = ~0x0F;
     sp8 = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(sp8) & mask);
     *start_stack = reinterpret_cast<uintptr_t*>(sp8);
@@ -1212,10 +1208,14 @@
     *start_gpr = reinterpret_cast<uintptr_t*>(sp8);
 
     // reserve space for the code pointer
-    sp8 -= sizeof(void*);
+    sp8 -= kPointerSize;
     *code_return = reinterpret_cast<void*>(sp8);
 
     *overall_size = reinterpret_cast<uint8_t*>(sp) - sp8;
+
+    // The new SP is stored at the end of the alloca, so it can be immediately popped
+    sp8 = reinterpret_cast<uint8_t*>(sp) - 5 * KB;
+    *(reinterpret_cast<uint8_t**>(sp8)) = method_pointer;
   }
 
   void ComputeSirtOffset() { }  // nothing to do, static right now
@@ -1291,21 +1291,21 @@
 // of transitioning into native code.
 class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor {
  public:
-  BuildGenericJniFrameVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
+  BuildGenericJniFrameVisitor(mirror::ArtMethod*** sp, bool is_static, const char* shorty,
                               uint32_t shorty_len, Thread* self) :
-      QuickArgumentVisitor(sp, is_static, shorty, shorty_len), sm_(this) {
+      QuickArgumentVisitor(*sp, is_static, shorty, shorty_len), sm_(this) {
     ComputeGenericJniFrameSize fsc;
-    fsc.ComputeLayout(is_static, shorty, shorty_len, sp, &cur_sirt_entry_, &sirt_,
-                      &sirt_expected_refs_, &cur_stack_arg_, &cur_gpr_reg_, &cur_fpr_reg_,
-                      &code_return_, &alloca_used_size_);
+    fsc.ComputeLayout(sp, is_static, shorty, shorty_len, *sp, &sirt_, &sirt_expected_refs_,
+                      &cur_stack_arg_, &cur_gpr_reg_, &cur_fpr_reg_, &code_return_,
+                      &alloca_used_size_);
     sirt_number_of_references_ = 0;
-    top_of_sirt_ = cur_sirt_entry_;
+    cur_sirt_entry_ = reinterpret_cast<StackReference<mirror::Object>*>(GetFirstSirtEntry());
 
     // jni environment is always first argument
     sm_.AdvancePointer(self->GetJniEnv());
 
     if (is_static) {
-      sm_.AdvanceSirt((*sp)->GetDeclaringClass());
+      sm_.AdvanceSirt((**sp)->GetDeclaringClass());
     }
   }
 
@@ -1359,7 +1359,7 @@
     // Initialize padding entries.
     while (sirt_number_of_references_ < sirt_expected_refs_) {
       *cur_sirt_entry_ = StackReference<mirror::Object>();
-      cur_sirt_entry_--;
+      cur_sirt_entry_++;
       sirt_number_of_references_++;
     }
     sirt_->SetNumberOfReferences(sirt_expected_refs_);
@@ -1368,8 +1368,8 @@
     self->PushSirt(sirt_);
   }
 
-  jobject GetFirstSirtEntry() {
-    return reinterpret_cast<jobject>(top_of_sirt_);
+  jobject GetFirstSirtEntry() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reinterpret_cast<jobject>(sirt_->GetStackReference(0));
   }
 
   void PushGpr(uintptr_t val) {
@@ -1394,9 +1394,15 @@
   }
 
   uintptr_t PushSirt(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    *cur_sirt_entry_ = StackReference<mirror::Object>::FromMirrorPtr(ref);
-    uintptr_t tmp = reinterpret_cast<uintptr_t>(cur_sirt_entry_);
-    cur_sirt_entry_--;
+    uintptr_t tmp;
+    if (ref == nullptr) {
+      *cur_sirt_entry_ = StackReference<mirror::Object>();
+      tmp = reinterpret_cast<uintptr_t>(nullptr);
+    } else {
+      *cur_sirt_entry_ = StackReference<mirror::Object>::FromMirrorPtr(ref);
+      tmp = reinterpret_cast<uintptr_t>(cur_sirt_entry_);
+    }
+    cur_sirt_entry_++;
     sirt_number_of_references_++;
     return tmp;
   }
@@ -1418,7 +1424,7 @@
   uintptr_t* cur_gpr_reg_;
   uint32_t* cur_fpr_reg_;
   uintptr_t* cur_stack_arg_;
-  StackReference<mirror::Object>* top_of_sirt_;
+  // StackReference<mirror::Object>* top_of_sirt_;
   void* code_return_;
   size_t alloca_used_size_;
 
@@ -1432,20 +1438,22 @@
  * Create a Sirt and call stack and fill a mini stack with values to be pushed to registers.
  * The final element on the stack is a pointer to the native code.
  *
+ * On entry, the stack has a standard callee-save frame above sp, and an alloca below it.
+ * We need to fix this, as the Sirt needs to go into the callee-save frame.
+ *
  * The return of this function denotes:
  * 1) How many bytes of the alloca can be released, if the value is non-negative.
  * 2) An error, if the value is negative.
  */
 extern "C" ssize_t artQuickGenericJniTrampoline(Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
   mirror::ArtMethod* called = *sp;
-  DCHECK(called->IsNative());
+  DCHECK(called->IsNative()) << PrettyMethod(called, true);
 
   // run the visitor
   MethodHelper mh(called);
 
-  BuildGenericJniFrameVisitor visitor(sp, called->IsStatic(), mh.GetShorty(), mh.GetShortyLength(),
+  BuildGenericJniFrameVisitor visitor(&sp, called->IsStatic(), mh.GetShorty(), mh.GetShortyLength(),
                                       self);
   visitor.VisitArguments();
   visitor.FinalizeSirt(self);
@@ -1462,11 +1470,14 @@
     if (self->IsExceptionPending()) {
       self->PopSirt();
       // A negative value denotes an error.
+      // TODO: Do we still need to fix the stack pointer? I think so. Then it's necessary to push
+      //       that value!
       return -1;
     }
   } else {
     cookie = JniMethodStart(self);
   }
+  uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
   *(sp32 - 1) = cookie;
 
   // retrieve native code
@@ -1480,8 +1491,8 @@
   size_t window_size = visitor.GetAllocaUsedSize();
   *code_pointer = reinterpret_cast<uintptr_t>(nativeCode);
 
-  // 5K reserved, window_size used.
-  return (5 * KB) - window_size;
+  // 5K reserved, window_size + frame pointer used.
+  return (5 * KB) - window_size - kPointerSize;
 }
 
 /*
@@ -1501,10 +1512,10 @@
   if (return_shorty_char == 'L') {
     // the only special ending call
     if (called->IsSynchronized()) {
-      ComputeGenericJniFrameSize fsc;
-      fsc.ComputeSirtOffset();
-      uint32_t offset = fsc.GetFirstSirtEntryOffset();
-      jobject tmp = reinterpret_cast<jobject>(reinterpret_cast<uint8_t*>(sp) - offset);
+      StackIndirectReferenceTable* table =
+          reinterpret_cast<StackIndirectReferenceTable*>(
+              reinterpret_cast<uint8_t*>(sp) + kPointerSize);
+      jobject tmp = reinterpret_cast<jobject>(table->GetStackReference(0));
 
       return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceSynchronized(result.l, cookie, tmp,
                                                                               self));
@@ -1513,10 +1524,10 @@
     }
   } else {
     if (called->IsSynchronized()) {
-      ComputeGenericJniFrameSize fsc;
-      fsc.ComputeSirtOffset();
-      uint32_t offset = fsc.GetFirstSirtEntryOffset();
-      jobject tmp = reinterpret_cast<jobject>(reinterpret_cast<uint8_t*>(sp) - offset);
+      StackIndirectReferenceTable* table =
+          reinterpret_cast<StackIndirectReferenceTable*>(
+              reinterpret_cast<uint8_t*>(sp) + kPointerSize);
+      jobject tmp = reinterpret_cast<jobject>(table->GetStackReference(0));
 
       JniMethodEndSynchronized(cookie, tmp, self);
     } else {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 6b897cb..fe27992 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -320,15 +320,6 @@
   self->PopManagedStackFragment(fragment);
 }
 
-#ifndef NDEBUG
-size_t ArtMethod::GetSirtOffsetInBytes() {
-  CHECK(IsNative());
-  // TODO: support Sirt access from generic JNI trampoline.
-  CHECK_NE(GetEntryPointFromQuickCompiledCode(), GetQuickGenericJniTrampoline());
-  return kPointerSize;
-}
-#endif
-
 bool ArtMethod::IsRegistered() {
   void* native_method =
       GetFieldPtr<void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_), false);
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 8c22e67..a9da66c 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -342,13 +342,9 @@
     return GetFrameSizeInBytes() - kPointerSize;
   }
 
-#ifndef NDEBUG
-  size_t GetSirtOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-#else
   size_t GetSirtOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return kPointerSize;
   }
-#endif
 
   bool IsRegistered();
 
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index dd2bd4f..5c79a71 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -394,6 +394,20 @@
     return shorty_len_;
   }
 
+  // Counts the number of references in the parameter list of the corresponding method.
+  // Note: Thus does _not_ include "this" for non-static methods.
+  uint32_t GetNumberOfReferenceArgsWithoutReceiver() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const char* shorty = GetShorty();
+    uint32_t refs = 0;
+    for (uint32_t i = 1; i < shorty_len_ ; ++i) {
+      if (shorty[i] == 'L') {
+        refs++;
+      }
+    }
+
+    return refs;
+  }
+
   const Signature GetSignature() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 15b288e..26b4de3 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -108,17 +108,11 @@
     return NULL;
   } else if (m->IsNative()) {
     if (cur_quick_frame_ != NULL) {
-      if (m->GetEntryPointFromQuickCompiledCode() == GetQuickGenericJniTrampoline()) {
-        UNIMPLEMENTED(ERROR) << "Failed to determine this object of native method: "
-            << PrettyMethod(m);
-        return nullptr;
-      } else {
-        StackIndirectReferenceTable* sirt =
-            reinterpret_cast<StackIndirectReferenceTable*>(
-                reinterpret_cast<char*>(cur_quick_frame_) +
-                m->GetSirtOffsetInBytes());
-        return sirt->GetReference(0);
-      }
+      StackIndirectReferenceTable* sirt =
+          reinterpret_cast<StackIndirectReferenceTable*>(
+              reinterpret_cast<char*>(cur_quick_frame_) +
+              m->GetSirtOffsetInBytes());
+      return sirt->GetReference(0);
     } else {
       return cur_shadow_frame_->GetVRegReference(0);
     }
diff --git a/runtime/stack_indirect_reference_table.h b/runtime/stack_indirect_reference_table.h
index c2d6a59..e6dda85 100644
--- a/runtime/stack_indirect_reference_table.h
+++ b/runtime/stack_indirect_reference_table.h
@@ -39,7 +39,7 @@
 
   ~StackIndirectReferenceTable() {}
 
-  // Number of references contained within this SIRT
+  // Number of references contained within this SIRT.
   uint32_t NumberOfReferences() const {
     return number_of_references_;
   }
@@ -51,7 +51,13 @@
     return header_size + data_size;
   }
 
-  // Link to previous SIRT or NULL
+  // Get the size of the SIRT for the number of entries, with padding added for potential alignment.
+  static size_t GetAlignedSirtSize(uint32_t num_references) {
+    size_t sirt_size = SizeOf(num_references);
+    return RoundUp(sirt_size, 8);
+  }
+
+  // Link to previous SIRT or NULL.
   StackIndirectReferenceTable* GetLink() const {
     return link_;
   }
@@ -72,6 +78,12 @@
     return references_[i].AsMirrorPtr();
   }
 
+  StackReference<mirror::Object>* GetStackReference(size_t i)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK_LT(i, number_of_references_);
+    return &references_[i];
+  }
+
   void SetReference(size_t i, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, number_of_references_);
     references_[i].Assign(object);