Implement direct apk -> boot calling

Also sharpen super calls.

Change-Id: Ie4d3ab2cbf2961a06ec86762a53132f49a4ed922
diff --git a/src/compiler.cc b/src/compiler.cc
index f64b1cb..5156122 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -71,12 +71,14 @@
      strings_in_dex_cache_(0), strings_not_in_dex_cache_(0),
      resolved_types_(0), unresolved_types_(0),
      resolved_instance_fields_(0), unresolved_instance_fields_(0),
-     resolved_local_static_fields_(0), resolved_static_fields_(0), unresolved_static_fields_(0),
-     virtual_made_direct_(0) {
-    for (size_t i = 0; i < kMaxInvokeType; i++) {
+     resolved_local_static_fields_(0), resolved_static_fields_(0), unresolved_static_fields_(0) {
+    for (size_t i = 0; i <= kMaxInvokeType; i++) {
       resolved_methods_[i] = 0;
       unresolved_methods_[i] = 0;
-    }
+      virtual_made_direct_[i] = 0;
+      direct_calls_to_boot_[i] = 0;
+      direct_methods_to_boot_[i] = 0;
+   }
   }
 
   void Dump() {
@@ -89,13 +91,32 @@
     DumpStat(resolved_local_static_fields_, resolved_static_fields_ + unresolved_static_fields_,
              "static fields local to a class");
 
-    for (size_t i = 0; i < kMaxInvokeType; i++) {
+    for (size_t i = 0; i <= kMaxInvokeType; i++) {
       std::ostringstream oss;
-      oss << "resolved " << static_cast<InvokeType>(i) << " methods";
+      oss << static_cast<InvokeType>(i) << " methods were AOT resolved";
       DumpStat(resolved_methods_[i], unresolved_methods_[i], oss.str().c_str());
+      if (virtual_made_direct_[i] > 0) {
+        std::ostringstream oss2;
+        oss2 << static_cast<InvokeType>(i) << " methods made direct";
+        DumpStat(virtual_made_direct_[i],
+                 resolved_methods_[i] + unresolved_methods_[i] - virtual_made_direct_[i],
+                 oss2.str().c_str());
+      }
+      if (direct_calls_to_boot_[i] > 0) {
+        std::ostringstream oss2;
+        oss2 << static_cast<InvokeType>(i) << " method calls are direct into boot";
+        DumpStat(direct_calls_to_boot_[i],
+                 resolved_methods_[i] + unresolved_methods_[i] - direct_calls_to_boot_[i],
+                 oss2.str().c_str());
+      }
+      if (direct_methods_to_boot_[i] > 0) {
+        std::ostringstream oss2;
+        oss2 << static_cast<InvokeType>(i) << " method calls have methods in boot";
+        DumpStat(direct_methods_to_boot_[i],
+                 resolved_methods_[i] + unresolved_methods_[i] - direct_methods_to_boot_[i],
+                 oss2.str().c_str());
+      }
     }
-    DumpStat(virtual_made_direct_, resolved_methods_[kVirtual] + unresolved_methods_[kVirtual],
-             "made direct from virtual");
   }
 
 // Allow lossy statistics in non-debug builds
@@ -172,10 +193,24 @@
     unresolved_methods_[type]++;
   }
 
-  void VirtualMadeDirect() {
+  void VirtualMadeDirect(InvokeType type) {
+    DCHECK_LE(type, kMaxInvokeType);
     STATS_LOCK();
-    virtual_made_direct_++;
+    virtual_made_direct_[type]++;
   }
+
+  void DirectCallsToBoot(InvokeType type) {
+    DCHECK_LE(type, kMaxInvokeType);
+    STATS_LOCK();
+    direct_calls_to_boot_[type]++;
+  }
+
+  void DirectMethodsToBoot(InvokeType type) {
+    DCHECK_LE(type, kMaxInvokeType);
+    STATS_LOCK();
+    direct_methods_to_boot_[type]++;
+  }
+
  private:
   Mutex stats_lock_;
 
@@ -197,7 +232,9 @@
 
   size_t resolved_methods_[kMaxInvokeType + 1];
   size_t unresolved_methods_[kMaxInvokeType + 1];
-  size_t virtual_made_direct_;
+  size_t virtual_made_direct_[kMaxInvokeType + 1];
+  size_t direct_calls_to_boot_[kMaxInvokeType + 1];
+  size_t direct_methods_to_boot_[kMaxInvokeType + 1];
 
   DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats);;
 };
@@ -652,9 +689,39 @@
   return false;  // Incomplete knowledge needs slow path.
 }
 
+void Compiler::GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type, Method* method,
+                                             uintptr_t& direct_code, uintptr_t& direct_method) {
+  direct_code = 0;
+  direct_method = 0;
+  if (sharp_type != kStatic && sharp_type != kDirect) {
+    return;
+  }
+  bool compiling_boot = Runtime::Current()->GetHeap()->GetSpaces().size() == 1;
+  if (compiling_boot) {
+    return;
+  }
+  bool method_code_in_boot = method->GetDeclaringClass()->GetClassLoader() == NULL;
+  if (!method_code_in_boot) {
+    return;
+  }
+  bool has_clinit_trampoline = method->IsStatic() && !method->GetDeclaringClass()->IsInitialized();
+  if (has_clinit_trampoline) {
+    return;
+  }
+  stats_->DirectCallsToBoot(type);
+  stats_->DirectMethodsToBoot(type);
+  if (Runtime::Current()->GetHeap()->GetImageSpace()->Contains(method)) {
+    direct_method = reinterpret_cast<uintptr_t>(method);
+  }
+  direct_code = reinterpret_cast<uintptr_t>(method->GetCode());
+}
+
 bool Compiler::ComputeInvokeInfo(uint32_t method_idx, OatCompilationUnit* mUnit, InvokeType& type,
-                                 int& vtable_idx) {
+                                 int& vtable_idx, uintptr_t& direct_code,
+                                 uintptr_t& direct_method) {
   vtable_idx = -1;
+  direct_code = 0;
+  direct_method = 0;
   Method* resolved_method = ComputeReferrerMethod(mUnit, method_idx);
   if (resolved_method != NULL) {
     Class* referrer_class = ComputeReferrerClass(mUnit);
@@ -678,27 +745,33 @@
                                           resolved_method->GetAccessFlags())) {
         vtable_idx = resolved_method->GetMethodIndex();
         const bool kEnableSharpening = true;
-        if (kEnableSharpening && type == kVirtual &&
-            (resolved_method->IsFinal() || methods_class->IsFinal())) {
-          stats_->ResolvedMethod(kVirtual);
+        // Sharpen a virtual call into a direct call when the target is known.
+        bool can_sharpen = type == kVirtual && (resolved_method->IsFinal() ||
+                                                methods_class->IsFinal());
+        // ensure the vtable index will be correct to dispatch in the vtable of the super class
+        can_sharpen = can_sharpen || (type == kSuper &&
+                                      referrer_class->IsSubClass(methods_class) &&
+                                      vtable_idx < methods_class->GetVTable()->GetLength());
+        if (kEnableSharpening && can_sharpen) {
+          stats_->ResolvedMethod(type);
           // Sharpen a virtual call into a direct call. The method_idx is into referrer's
           // dex cache, check that this resolved method is where we expect it.
           CHECK(referrer_class->GetDexCache()->GetResolvedMethod(method_idx) == resolved_method)
             << PrettyMethod(resolved_method);
-          type = kDirect;
-          stats_->VirtualMadeDirect();
-          return true;
-        } else if (type != kSuper) {
-          // nothing left to do for static/direct/virtual/interface dispatch
-          stats_->ResolvedMethod(type);
-          return true;
-        } else {
-          // ensure the vtable index will be correct to dispatch in the vtable of the super class
-          if (referrer_class->IsSubClass(methods_class) &&
-              vtable_idx < methods_class->GetVTable()->GetLength()) {
-            stats_->ResolvedMethod(type);
-            return true;
+          if (type == kSuper) {
+            CHECK(methods_class->GetVTable()->Get(vtable_idx) == resolved_method)
+              << PrettyMethod(resolved_method);
           }
+          stats_->VirtualMadeDirect(type);
+          GetCodeAndMethodForDirectCall(type, kDirect, resolved_method, direct_code, direct_method);
+          type = kDirect;
+          return true;
+        } else if (type == kSuper) {
+          // Unsharpened super calls are suspicious so go slowpath.
+        } else {
+          stats_->ResolvedMethod(type);
+          GetCodeAndMethodForDirectCall(type, type, resolved_method, direct_code, direct_method);
+          return true;
         }
       }
     }
diff --git a/src/compiler.h b/src/compiler.h
index aa32854..f5f5f9f 100644
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -115,7 +115,7 @@
 
   // Can we fastpath a interface, super class or virtual method call? Computes method's vtable index
   bool ComputeInvokeInfo(uint32_t method_idx, OatCompilationUnit* mUnit, InvokeType& type,
-                         int& vtable_idx);
+                         int& vtable_idx, uintptr_t& direct_code, uintptr_t& direct_method);
 
 #if defined(ART_USE_LLVM_COMPILER)
   void SetElfFileName(std::string const& filename);
@@ -134,6 +134,10 @@
 
  private:
 
+  // Compute constant code and method pointers when possible
+  void GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type,Method* method,
+                                     uintptr_t& direct_code, uintptr_t& direct_method);
+
   // Checks if class specified by type_idx is one of the image_classes_
   bool IsImageClass(const std::string& descriptor) const;
 
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
index d19c1f9..401eeb6 100644
--- a/src/compiler/codegen/GenInvoke.cc
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -24,7 +24,8 @@
 
 
 typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
-                            uint32_t methodIdx);
+                            uint32_t methodIdx, uintptr_t directCode,
+                            uintptr_t directMethod);
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform intial
@@ -98,9 +99,20 @@
  * emit the next instruction in static & direct invoke sequences.
  */
 int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
-                   int state, uint32_t dexIdx, uint32_t unused)
+                   int state, uint32_t dexIdx, uint32_t unused,
+                   uintptr_t directCode, uintptr_t directMethod)
 {
-    switch(state) {
+    if (directCode != 0 && directMethod != 0) {
+        switch(state) {
+        case 0:  // Get the current Method* [sets rARG0]
+            loadConstant(cUnit, rINVOKE_TGT, directCode);
+            loadConstant(cUnit, rARG0, directMethod);
+            break;
+        default:
+            return -1;
+      }
+    } else {
+        switch(state) {
         case 0:  // Get the current Method* [sets rARG0]
             loadCurrMethodDirect(cUnit, rARG0);
             break;
@@ -108,6 +120,10 @@
             loadWordDisp(cUnit, rARG0,
                 Method::DexCacheResolvedMethodsOffset().Int32Value(),
                 rARG0);
+            // Set up direct code if known.
+            if (directCode != 0) {
+                loadConstant(cUnit, rINVOKE_TGT, directCode);
+            }
             break;
         case 2:  // Grab target method*
             loadWordDisp(cUnit, rARG0,
@@ -116,12 +132,15 @@
             break;
 #if !defined(TARGET_X86)
         case 3:  // Grab the code from the method*
-            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
-                         rINVOKE_TGT);
+            if (directCode == 0) {
+                loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
+                             rINVOKE_TGT);
+            }
             break;
 #endif
         default:
             return -1;
+        }
     }
     return state + 1;
 }
@@ -134,7 +153,8 @@
  * rARG1 here rather than the standard loadArgRegs.
  */
 int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
-                  int state, uint32_t dexIdx, uint32_t methodIdx)
+                  int state, uint32_t dexIdx, uint32_t methodIdx,
+                  uintptr_t unused, uintptr_t unused2)
 {
     RegLocation rlArg;
     /*
@@ -173,58 +193,6 @@
     return state + 1;
 }
 
-/*
- * Interleave launch code for INVOKE_SUPER.  See comments
- * for nextVCallIns.
- */
-int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
-                      int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-    /*
-     * This is the fast path in which the target virtual method is
-     * fully resolved at compile time.  Note also that this path assumes
-     * that the check to verify that the target method index falls
-     * within the size of the super's vtable has been done at compile-time.
-     */
-    RegLocation rlArg;
-    switch(state) {
-        case 0: // Get current Method* [set rARG0]
-            loadCurrMethodDirect(cUnit, rARG0);
-            // Load "this" [set rARG1]
-            rlArg = oatGetSrc(cUnit, mir, 0);
-            loadValueDirectFixed(cUnit, rlArg, rARG1);
-            // Get method->declaring_class_ [use rARG0, set rINVOKE_TGT]
-            loadWordDisp(cUnit, rARG0,
-                         Method::DeclaringClassOffset().Int32Value(),
-                         rINVOKE_TGT);
-            // Is "this" null? [use rARG1]
-            genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
-            break;
-        case 1: // method->declaring_class_->super_class [use/set rINVOKE_TGT]
-            loadWordDisp(cUnit, rINVOKE_TGT,
-                         Class::SuperClassOffset().Int32Value(), rINVOKE_TGT);
-            break;
-        case 2: // Get ...->super_class_->vtable [u/s rINVOKE_TGT]
-            loadWordDisp(cUnit, rINVOKE_TGT,
-                         Class::VTableOffset().Int32Value(), rINVOKE_TGT);
-            break;
-        case 3: // Get target method [use rINVOKE_TGT, set rARG0]
-            loadWordDisp(cUnit, rINVOKE_TGT, (methodIdx * 4) +
-                         Array::DataOffset(sizeof(Object*)).Int32Value(),
-                         rARG0);
-            break;
-#if !defined(TARGET_X86)
-        case 4: // target compiled code address [uses rARG0, sets rINVOKE_TGT]
-            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
-                         rINVOKE_TGT);
-            break;
-#endif
-        default:
-            return -1;
-    }
-    return state + 1;
-}
-
 int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
                      int state, uint32_t dexIdx, uint32_t methodIdx)
 {
@@ -245,28 +213,32 @@
 }
 
 int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
-                         int state, uint32_t dexIdx, uint32_t methodIdx)
+                         int state, uint32_t dexIdx, uint32_t methodIdx,
+                         uintptr_t unused, uintptr_t unused2)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }
 
 int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
-                         uint32_t dexIdx, uint32_t methodIdx)
+                         uint32_t dexIdx, uint32_t methodIdx, uintptr_t unused,
+                         uintptr_t unused2)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }
 
 int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
-                        uint32_t dexIdx, uint32_t methodIdx)
+                        uint32_t dexIdx, uint32_t methodIdx, uintptr_t unused,
+                        uintptr_t unused2)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
 }
 
 int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
-                    uint32_t dexIdx, uint32_t methodIdx)
+                    uint32_t dexIdx, uint32_t methodIdx, uintptr_t unused,
+                    uintptr_t unused2)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
@@ -277,7 +249,8 @@
  * which will locate the target and continue on via a tail call.
  */
 int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
-                          uint32_t dexIdx, uint32_t unused)
+                          uint32_t dexIdx, uint32_t unused, uintptr_t unused2,
+                          uintptr_t unused3)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
@@ -285,7 +258,8 @@
 
 int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
                                          int state, uint32_t dexIdx,
-                                         uint32_t unused)
+                                         uint32_t unused, uintptr_t unused2,
+                                         uintptr_t unused3)
 {
   int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
   return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
@@ -293,7 +267,8 @@
 
 int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
                 int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
-                uint32_t methodIdx, bool skipThis)
+                uint32_t methodIdx, uintptr_t directCode,
+                uintptr_t directMethod, bool skipThis)
 {
 #if !defined(TARGET_X86)
     int lastArgReg = rARG3;
@@ -317,7 +292,8 @@
             rlArg.wide = false;
             loadValueDirectFixed(cUnit, rlArg, nextReg);
         }
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                                 directCode, directMethod);
     }
     return callState;
 }
@@ -332,7 +308,9 @@
 int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
                          DecodedInstruction* dInsn, int callState,
                          LIR** pcrLabel, NextCallInsn nextCallInsn,
-                         uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
+                         uint32_t dexIdx, uint32_t methodIdx,
+                         uintptr_t directCode, uintptr_t directMethod,
+                         bool skipThis)
 {
     RegLocation rlArg;
 
@@ -340,7 +318,8 @@
     if (dInsn->vA == 0)
         return callState;
 
-    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                             directCode, directMethod);
 
     DCHECK_LE(dInsn->vA, 5U);
     if (dInsn->vA > 3) {
@@ -366,11 +345,12 @@
                 loadWordDisp(cUnit, rSP,
                              oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
                 callState = nextCallInsn(cUnit, mir, callState, dexIdx,
-                                         methodIdx);
+                                         methodIdx, directCode, directMethod);
             }
             storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
             storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
-            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                                     directCode, directMethod);
             nextUse++;
         }
         // Loop through the rest
@@ -395,7 +375,7 @@
                     loadValueDirectFixed(cUnit, rlArg, lowReg);
                 }
                 callState = nextCallInsn(cUnit, mir, callState, dexIdx,
-                                         methodIdx);
+                                         methodIdx, directCode, directMethod);
             }
             int outsOffset = (nextUse + 1) * 4;
             if (rlArg.wide) {
@@ -405,12 +385,14 @@
                 storeWordDisp(cUnit, rSP, outsOffset, lowReg);
                 nextUse++;
             }
-            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                                     directCode, directMethod);
         }
     }
 
     callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
-                            dexIdx, methodIdx, skipThis);
+                            dexIdx, methodIdx, directCode, directMethod,
+                            skipThis);
 
     if (pcrLabel) {
         *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
@@ -436,7 +418,9 @@
 int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
                        DecodedInstruction* dInsn, int callState,
                        LIR** pcrLabel, NextCallInsn nextCallInsn,
-                       uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
+                       uint32_t dexIdx, uint32_t methodIdx,
+                       uintptr_t directCode, uintptr_t directMethod,
+                       bool skipThis)
 {
     int firstArg = dInsn->vC;
     int numArgs = dInsn->vA;
@@ -445,7 +429,7 @@
     if (numArgs <= 5)
         return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
                                     nextCallInsn, dexIdx, methodIdx,
-                                    skipThis);
+                                    directCode, directMethod, skipThis);
     /*
      * Make sure range list doesn't span the break between in normal
      * Dalvik vRegs and the ins.
@@ -516,26 +500,33 @@
     } else {
         // Use vldm/vstm pair using rARG3 as a temp
         int regsLeft = std::min(numArgs - 3, 16);
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                                 directCode, directMethod);
         opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
         LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
         //TUNING: loosen barrier
         ld->defMask = ENCODE_ALL;
         setMemRefType(ld, true /* isLoad */, kDalvikReg);
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                                 directCode, directMethod);
         opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* */ + (3 * 4));
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                                 directCode, directMethod);
         LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
         setMemRefType(st, false /* isLoad */, kDalvikReg);
         st->defMask = ENCODE_ALL;
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                                 directCode, directMethod);
+
     }
 #endif
 
     callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
-                            dexIdx, methodIdx, skipThis);
+                            dexIdx, methodIdx, directCode, directMethod,
+                            skipThis);
 
-    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx,
+                             directCode, directMethod);
     if (pcrLabel) {
         *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
     }
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index be8f1f9..64f55c6 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -70,10 +70,13 @@
 
     uint32_t dexMethodIdx = dInsn->vB;
     int vtableIdx;
+    uintptr_t directCode;
+    uintptr_t directMethod;
     bool skipThis;
     bool fastPath =
         cUnit->compiler->ComputeInvokeInfo(dexMethodIdx, &mUnit, type,
-                                           vtableIdx)
+                                           vtableIdx, directCode,
+                                           directMethod)
         && !SLOW_INVOKE_PATH;
     if (type == kInterface) {
       nextCallInsn = fastPath ? nextInterfaceCallInsn
@@ -89,8 +92,9 @@
       nextCallInsn = fastPath ? nextSDCallInsn : nextStaticCallInsnSP;
       skipThis = false;
     } else if (type == kSuper) {
-      nextCallInsn = fastPath ? nextSuperCallInsn : nextSuperCallInsnSP;
-      skipThis = fastPath;
+      DCHECK(!fastPath);  // Fast path is a direct call.
+      nextCallInsn = nextSuperCallInsnSP;
+      skipThis = false;
     } else {
       DCHECK_EQ(type, kVirtual);
       nextCallInsn = fastPath ? nextVCallInsn : nextVCallInsnSP;
@@ -99,16 +103,16 @@
     if (!isRange) {
         callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pNullCk,
                                          nextCallInsn, dexMethodIdx,
-                                         vtableIdx, skipThis);
+                                         vtableIdx, directCode, directMethod, skipThis);
     } else {
         callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, pNullCk,
                                        nextCallInsn, dexMethodIdx, vtableIdx,
-                                       skipThis);
+                                       directCode, directMethod, skipThis);
     }
     // Finish up any of the call sequence not interleaved in arg loading
     while (callState >= 0) {
         callState = nextCallInsn(cUnit, mir, callState, dexMethodIdx,
-                                 vtableIdx);
+                                 vtableIdx, directCode, directMethod);
     }
     if (DISPLAY_MISSING_TARGETS) {
         genShowTarget(cUnit);
diff --git a/src/runtime_support.cc b/src/runtime_support.cc
index c61db20..7838d94 100644
--- a/src/runtime_support.cc
+++ b/src/runtime_support.cc
@@ -431,11 +431,11 @@
     is_static = (instr_code == Instruction::INVOKE_STATIC) ||
                 (instr_code == Instruction::INVOKE_STATIC_RANGE);
     is_virtual = (instr_code == Instruction::INVOKE_VIRTUAL) ||
-                 (instr_code == Instruction::INVOKE_VIRTUAL_RANGE);
-    DCHECK(is_static || (instr_code == Instruction::INVOKE_DIRECT) ||
-           (instr_code == Instruction::INVOKE_DIRECT_RANGE) ||
-           (instr_code == Instruction::INVOKE_VIRTUAL) ||
-           (instr_code == Instruction::INVOKE_VIRTUAL_RANGE));
+                 (instr_code == Instruction::INVOKE_VIRTUAL_RANGE) ||
+                 (instr_code == Instruction::INVOKE_SUPER) ||
+                 (instr_code == Instruction::INVOKE_SUPER_RANGE);
+    DCHECK(is_static || is_virtual || (instr_code == Instruction::INVOKE_DIRECT) ||
+           (instr_code == Instruction::INVOKE_DIRECT_RANGE));
     DecodedInstruction dec_insn(instr);
     dex_method_idx = dec_insn.vB;
     shorty = linker->MethodShorty(dex_method_idx, caller, &shorty_len);
@@ -505,9 +505,14 @@
       if (LIKELY(called_class->IsInitialized())) {
         code = called->GetCode();
       } else if (called_class->IsInitializing()) {
-        // Class is still initializing, go to oat and grab code (trampoline must be left in place
-        // until class is initialized to stop races between threads).
-        code = linker->GetOatCodeFor(called);
+        if (is_static) {
+          // Class is still initializing, go to oat and grab code (trampoline must be left in place
+          // until class is initialized to stop races between threads).
+          code = linker->GetOatCodeFor(called);
+        } else {
+          // No trampoline for non-static methods.
+          code = called->GetCode();
+        }
       } else {
         DCHECK(called_class->IsErroneous());
       }