Initial ARM JNI compiler support.

Change-Id: I85183eec9a2645e6cb074b4b18bc6af800a77e06
diff --git a/src/assembler.cc b/src/assembler.cc
index 55c6d07..62f11a3 100644
--- a/src/assembler.cc
+++ b/src/assembler.cc
@@ -83,7 +83,8 @@
   // Copy the instructions from the buffer.
   MemoryRegion from(reinterpret_cast<void*>(contents()), Size());
   instructions.CopyFrom(0, from);
-
+  // Flush instruction cache
+  __builtin___clear_cache(instructions.start(), instructions.end());
   // Process fixups in the instructions.
   ProcessFixups(instructions);
 #if defined(DEBUG)
diff --git a/src/assembler_arm.cc b/src/assembler_arm.cc
index 900d8d4..a728239 100644
--- a/src/assembler_arm.cc
+++ b/src/assembler_arm.cc
@@ -3,6 +3,7 @@
 #include "assembler.h"
 #include "logging.h"
 #include "offsets.h"
+#include "thread.h"
 #include "utils.h"
 
 namespace art {
@@ -241,7 +242,6 @@
   }
 }
 
-
 void Assembler::and_(Register rd, Register rn, ShifterOperand so,
                      Condition cond) {
   EmitType01(cond, so.type(), AND, 0, rn, rd, so);
@@ -1259,6 +1259,39 @@
   }
 }
 
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffset, as expected by JIT::GuardedLoadFromOffset.
+void Assembler::LoadSFromOffset(SRegister reg,
+                                Register base,
+                                int32_t offset,
+                                Condition cond) {
+  if (!Address::CanHoldLoadOffset(kLoadSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffset(kLoadSWord, offset));
+  vldrs(reg, Address(base, offset), cond);
+}
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffset, as expected by JIT::GuardedLoadFromOffset.
+void Assembler::LoadDFromOffset(DRegister reg,
+                                Register base,
+                                int32_t offset,
+                                Condition cond) {
+  if (!Address::CanHoldLoadOffset(kLoadDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffset(kLoadDWord, offset));
+  vldrd(reg, Address(base, offset), cond);
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldStoreOffset.
@@ -1294,19 +1327,53 @@
   }
 }
 
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffset, as expected by JIT::GuardedStoreToOffset.
+void Assembler::StoreSToOffset(SRegister reg,
+                               Register base,
+                               int32_t offset,
+                               Condition cond) {
+  if (!Address::CanHoldStoreOffset(kStoreSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffset(kStoreSWord, offset));
+  vstrs(reg, Address(base, offset), cond);
+}
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffset, as expected by JIT::GuardedStoreSToOffset.
+void Assembler::StoreDToOffset(DRegister reg,
+                               Register base,
+                               int32_t offset,
+                               Condition cond) {
+  if (!Address::CanHoldStoreOffset(kStoreDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffset(kStoreDWord, offset));
+  vstrd(reg, Address(base, offset), cond);
+}
+
 // Emit code that will create an activation on the stack
 void Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg) {
   CHECK(IsAligned(frame_size, 16));
   // TODO: use stm/ldm
-  StoreToOffset(kStoreWord, LR, SP, 0);
-  StoreToOffset(kStoreWord, method_reg.AsCoreRegister(), SP, -4);
   AddConstant(SP, -frame_size);
+  StoreToOffset(kStoreWord, LR, SP, frame_size - 4);
+  StoreToOffset(kStoreWord, method_reg.AsCoreRegister(), SP, 0);
 }
 
 // Emit code that will remove an activation from the stack
 void Assembler::RemoveFrame(size_t frame_size) {
   CHECK(IsAligned(frame_size, 16));
-  LoadFromOffset(kLoadWord, LR, SP, 0);
+  LoadFromOffset(kLoadWord, LR, SP, frame_size - 4);
   AddConstant(SP, frame_size);
   mov(PC, ShifterOperand(LR));
 }
@@ -1323,12 +1390,21 @@
 
 // Store bytes from the given register onto the stack
 void Assembler::Store(FrameOffset dest, ManagedRegister src, size_t size) {
-  if (src.IsCoreRegister()) {
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCoreRegister()) {
     CHECK_EQ(4u, size);
     StoreToOffset(kStoreWord, src.AsCoreRegister(), SP, dest.Int32Value());
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    StoreToOffset(kStoreWord, src.AsRegisterPairLow(), SP, dest.Int32Value());
+    StoreToOffset(kStoreWord, src.AsRegisterPairHigh(),
+                  SP, dest.Int32Value() + 4);
+  } else if (src.IsSRegister()) {
+    StoreSToOffset(src.AsSRegister(), SP, dest.Int32Value());
   } else {
-    // VFP
-    LOG(FATAL) << "TODO";
+    CHECK(src.IsDRegister());
+    StoreDToOffset(src.AsDRegister(), SP, dest.Int32Value());
   }
 }
 
@@ -1370,12 +1446,21 @@
 }
 
 void Assembler::Load(ManagedRegister dest, FrameOffset src, size_t size) {
-  if (dest.IsCoreRegister()) {
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCoreRegister()) {
     CHECK_EQ(4u, size);
     LoadFromOffset(kLoadWord, dest.AsCoreRegister(), SP, src.Int32Value());
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    LoadFromOffset(kLoadWord, dest.AsRegisterPairLow(), SP, src.Int32Value());
+    LoadFromOffset(kLoadWord, dest.AsRegisterPairHigh(),
+                   SP, src.Int32Value() + 4);
+  } else if (dest.IsSRegister()) {
+    LoadSFromOffset(dest.AsSRegister(), SP, src.Int32Value());
   } else {
-    // TODO: VFP
-    LOG(FATAL) << "Unimplemented";
+    CHECK(dest.IsDRegister());
+    LoadDFromOffset(dest.AsDRegister(), SP, src.Int32Value());
   }
 }
 
@@ -1386,7 +1471,7 @@
 }
 
 void Assembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset thr_offs,
-                          ManagedRegister scratch) {
+                                     ManagedRegister scratch) {
   CHECK(scratch.IsCoreRegister());
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  TR, thr_offs.Int32Value());
@@ -1395,7 +1480,7 @@
 }
 
 void Assembler::CopyRawPtrToThread(ThreadOffset thr_offs, FrameOffset fr_offs,
-                        ManagedRegister scratch) {
+                                   ManagedRegister scratch) {
   CHECK(scratch.IsCoreRegister());
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  SP, fr_offs.Int32Value());
@@ -1417,12 +1502,14 @@
 }
 
 void Assembler::Move(ManagedRegister dest, ManagedRegister src) {
-  if (dest.IsCoreRegister()) {
-    CHECK(src.IsCoreRegister());
-    mov(dest.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
-  } else {
-    // TODO: VFP
-    LOG(FATAL) << "Unimplemented";
+  if (!dest.Equals(src)) {
+    if (dest.IsCoreRegister()) {
+      CHECK(src.IsCoreRegister());
+      mov(dest.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
+    } else {
+      // TODO: VFP
+      LOG(FATAL) << "Unimplemented";
+    }
   }
 }
 
@@ -1443,12 +1530,17 @@
 void Assembler::CreateStackHandle(ManagedRegister out_reg,
                                   FrameOffset handle_offset,
                                   ManagedRegister in_reg, bool null_allowed) {
-  CHECK(in_reg.IsCoreRegister());
+  CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister());
   CHECK(out_reg.IsCoreRegister());
   if (null_allowed) {
     // Null values get a handle value of 0.  Otherwise, the handle value is
     // the address in the stack handle block holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
+    if (in_reg.IsNoRegister()) {
+      LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
+                     SP, handle_offset.Int32Value());
+      in_reg = out_reg;
+    }
     cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
     if (!out_reg.Equals(in_reg)) {
       LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
@@ -1508,19 +1600,73 @@
   // TODO: place reference map on call
 }
 
+void Assembler::Call(FrameOffset base, Offset offset,
+                     ManagedRegister scratch) {
+  CHECK(scratch.IsCoreRegister());
+  // Call *(*(SP + base) + offset)
+  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
+                 SP, base.Int32Value());
+  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
+                 scratch.AsCoreRegister(), offset.Int32Value());
+  blx(scratch.AsCoreRegister());
+  // TODO: place reference map on call
+}
+
 // Generate code to check if Thread::Current()->suspend_count_ is non-zero
 // and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
 // at the next instruction.
 void Assembler::SuspendPoll(ManagedRegister scratch, ManagedRegister return_reg,
                             FrameOffset return_save_location,
                             size_t return_size) {
-  LOG(WARNING) << "Unimplemented: Suspend poll";
+  SuspendCountSlowPath* slow = new SuspendCountSlowPath(return_reg,
+                                                        return_save_location,
+                                                        return_size);
+  buffer_.EnqueueSlowPath(slow);
+  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
+                 TR, Thread::SuspendCountOffset().Int32Value());
+  cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+  b(slow->Entry(), NE);
+  Bind(slow->Continuation());
+}
+
+void SuspendCountSlowPath::Emit(Assembler* sp_asm) {
+  sp_asm->Bind(&entry_);
+  // Save return value
+  sp_asm->Store(return_save_location_, return_register_, return_size_);
+  // Pass top of stack as argument
+  sp_asm->mov(R0, ShifterOperand(SP));
+  sp_asm->LoadFromOffset(kLoadWord, R12, TR,
+                         Thread::SuspendCountEntryPointOffset().Int32Value());
+  // Note: assume that link register will be spilled/filled on method entry/exit
+  sp_asm->blx(R12);
+  // Reload return value
+  sp_asm->Load(return_register_, return_save_location_, return_size_);
+  sp_asm->b(&continuation_);
 }
 
 // Generate code to check if Thread::Current()->exception_ is non-null
 // and branch to a ExceptionSlowPath if it is.
 void Assembler::ExceptionPoll(ManagedRegister scratch) {
-  LOG(WARNING) << "Unimplemented: Exception poll";
+  ExceptionSlowPath* slow = new ExceptionSlowPath();
+  buffer_.EnqueueSlowPath(slow);
+  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
+                 TR, Thread::ExceptionOffset().Int32Value());
+  cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+  b(slow->Entry(), NE);
+  Bind(slow->Continuation());
+}
+
+void ExceptionSlowPath::Emit(Assembler* sp_asm) {
+  sp_asm->Bind(&entry_);
+  // Pass top of stack as argument
+  sp_asm->mov(R0, ShifterOperand(SP));
+  sp_asm->LoadFromOffset(kLoadWord, R12, TR,
+                         Thread::ExceptionEntryPointOffset().Int32Value());
+  // Note: assume that link register will be spilled/filled on method entry/exit
+  sp_asm->blx(R12);
+  // TODO: this call should never return as it should make a long jump to
+  // the appropriate catch block
+  sp_asm->b(&continuation_);
 }
 
 }  // namespace art
diff --git a/src/assembler_arm.h b/src/assembler_arm.h
index 728fadf..04671db 100644
--- a/src/assembler_arm.h
+++ b/src/assembler_arm.h
@@ -464,6 +464,7 @@
   void ValidateRef(FrameOffset src, bool could_be_null);
 
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch);
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch);
 
   // Generate code to check if Thread::Current()->suspend_count_ is non-zero
   // and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
diff --git a/src/assembler_x86.cc b/src/assembler_x86.cc
index e7d048b..e377916 100644
--- a/src/assembler_x86.cc
+++ b/src/assembler_x86.cc
@@ -1606,6 +1606,11 @@
   // TODO: place reference map on call
 }
 
+void Assembler::Call(FrameOffset base, Offset offset,
+                     ManagedRegister) {
+  LOG(FATAL) << "Unimplemented";
+}
+
 // Generate code to check if Thread::Current()->suspend_count_ is non-zero
 // and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
 // at the next instruction.
diff --git a/src/assembler_x86.h b/src/assembler_x86.h
index 5e691d1..ab010d1 100644
--- a/src/assembler_x86.h
+++ b/src/assembler_x86.h
@@ -464,6 +464,7 @@
   void ValidateRef(FrameOffset src, bool could_be_null);
 
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch);
+  void Call(FrameOffset base, Offset offset, ManagedRegister scratch);
 
   // Generate code to check if Thread::Current()->suspend_count_ is non-zero
   // and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
diff --git a/src/calling_convention.cc b/src/calling_convention.cc
index 5ab5b45..bfd0e34 100644
--- a/src/calling_convention.cc
+++ b/src/calling_convention.cc
@@ -7,6 +7,11 @@
 
 namespace art {
 
+// Offset of Method within the frame
+FrameOffset CallingConvention::MethodStackOffset() {
+  return displacement_;
+}
+
 // Managed runtime calling convention
 
 size_t ManagedRuntimeCallingConvention::FrameSize() {
diff --git a/src/calling_convention.h b/src/calling_convention.h
index 6dec292..23ca6d6 100644
--- a/src/calling_convention.h
+++ b/src/calling_convention.h
@@ -26,6 +26,9 @@
   // Register reserved for scratch usage during procedure calls
   ManagedRegister InterproceduralScratchRegister();
 
+  // Offset of Method within the frame
+  FrameOffset MethodStackOffset();
+
   // Iterator interface
 
   // Place iterator at start of arguments. The displacement is applied to
@@ -104,6 +107,10 @@
   // call is made following the native call
   FrameOffset ReturnValueSaveLocation();
 
+  // Returns true if the register will be clobbered by an outgoing
+  // argument value.
+  bool IsOutArgRegister(ManagedRegister reg);
+
   // Iterator interface
   bool HasNext();
   void Next();
diff --git a/src/calling_convention_arm.cc b/src/calling_convention_arm.cc
index 7535389..f93dc9b 100644
--- a/src/calling_convention_arm.cc
+++ b/src/calling_convention_arm.cc
@@ -17,9 +17,9 @@
 ManagedRegister CallingConvention::ReturnRegister() {
   const Method *method = GetMethod();
   if (GetMethod()->IsReturnAFloat()) {
-    return ManagedRegister::FromSRegister(S0);
+    return ManagedRegister::FromCoreRegister(R0);
   } else if (GetMethod()->IsReturnADouble()) {
-    return ManagedRegister::FromDRegister(D0);
+    return ManagedRegister::FromRegisterPair(R0_R1);
   } else if (method->IsReturnALong()) {
     return ManagedRegister::FromRegisterPair(R0_R1);
   } else if (method->IsReturnVoid()) {
@@ -44,8 +44,18 @@
 };
 ManagedRegister ManagedRuntimeCallingConvention::CurrentParamRegister() {
   CHECK_LT(itr_position_, 3u);
-  return
-    ManagedRegister::FromCoreRegister(kManagedArgumentRegisters[itr_position_]);
+  const Method* method = GetMethod();
+  if (method->IsParamALongOrDouble(itr_position_)) {
+    // TODO: handle a long/double split between registers and the stack, also
+    // itr_position_ 0
+    if (itr_position_ != 1u) {
+      LOG(WARNING) << "Unimplemented";
+    }
+    return ManagedRegister::FromRegisterPair(R2_R3);
+  } else {
+    return
+      ManagedRegister::FromCoreRegister(kManagedArgumentRegisters[itr_position_]);
+  }
 }
 
 FrameOffset ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
@@ -56,25 +66,41 @@
 
 // JNI calling convention
 
+// Will reg be crushed by an outgoing argument?
+bool JniCallingConvention::IsOutArgRegister(ManagedRegister) {
+  // R0 holds the method register and will be crushed by the JNIEnv*
+  return true;
+}
+
 bool JniCallingConvention::IsCurrentParamInRegister() {
-  return itr_position_ < 4;
+  return (itr_position_ + itr_longs_and_doubles_) < 4;
 }
 
 bool JniCallingConvention::IsCurrentParamOnStack() {
-  return itr_position_ >= 4;
+  return (itr_position_ + itr_longs_and_doubles_) >= 4;
 }
 
 static const Register kJniArgumentRegisters[] = {
   R0, R1, R2, R3
 };
 ManagedRegister JniCallingConvention::CurrentParamRegister() {
-  CHECK_LT(itr_position_, 4u);
-  return
-    ManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_position_]);
+  CHECK_LT(itr_position_ + itr_longs_and_doubles_, 4u);
+  const Method* method = GetMethod();
+  int arg_pos = itr_position_ - (method->IsStatic() ? 2 : 1);
+  if ((itr_position_ >= 2) && method->IsParamALongOrDouble(arg_pos)) {
+    // TODO: handle a long/double split between registers and the stack
+    if (itr_position_ != 2u) {
+      LOG(WARNING) << "Unimplemented";
+    }
+    return ManagedRegister::FromRegisterPair(R2_R3);
+  } else {
+    return
+      ManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_position_]);
+  }
 }
 
 FrameOffset JniCallingConvention::CurrentParamStackOffset() {
-  CHECK_GE(itr_position_, 4u);
+  CHECK_GE(itr_position_ + itr_longs_and_doubles_, 4u);
   return FrameOffset(displacement_.Int32Value() - OutArgSize()
                + ((itr_position_ + itr_longs_and_doubles_ - 4) * kPointerSize));
 }
diff --git a/src/calling_convention_x86.cc b/src/calling_convention_x86.cc
index c212013..2724cba 100644
--- a/src/calling_convention_x86.cc
+++ b/src/calling_convention_x86.cc
@@ -50,6 +50,10 @@
 
 // JNI calling convention
 
+bool JniCallingConvention::IsOutArgRegister(ManagedRegister) {
+  return false;  // Everything is passed by stack
+}
+
 bool JniCallingConvention::IsCurrentParamInRegister() {
   return false;  // Everything is passed by stack
 }
diff --git a/src/jni_compiler.cc b/src/jni_compiler.cc
index f80e7ad..7eabe4e 100644
--- a/src/jni_compiler.cc
+++ b/src/jni_compiler.cc
@@ -74,7 +74,6 @@
       bool input_in_reg = mr_conv.IsCurrentParamInRegister();
       CHECK(input_in_reg || mr_conv.IsCurrentParamOnStack());
       if (input_in_reg) {
-        LOG(FATAL) << "UNTESTED";
         ManagedRegister in_reg  =  mr_conv.CurrentParamRegister();
         jni_asm->ValidateRef(in_reg, mr_conv.IsCurrentParamPossiblyNull());
         jni_asm->StoreRef(handle_offset, in_reg);
@@ -90,8 +89,7 @@
   }
 
   // 5. Transition from being in managed to native code
-  // TODO: write out anchor, ensure the transition to native follow a store
-  //       fence.
+  // TODO: ensure the transition to native follow a store fence.
   jni_asm->StoreStackPointerToThread(Thread::TopOfManagedStackOffset());
   jni_asm->StoreImmediateToThread(Thread::StateOffset(), Thread::kNative,
                                   mr_conv.InterproceduralScratchRegister());
@@ -103,6 +101,7 @@
 
   // 7. Acquire lock for synchronized methods.
   if (native_method->IsSynchronized()) {
+    // TODO: preserve incoming arguments in registers
     mr_conv.ResetIterator(FrameOffset(frame_size+out_arg_size));
     jni_conv.ResetIterator(FrameOffset(out_arg_size));
     jni_conv.Next();  // Skip JNIEnv*
@@ -148,6 +147,19 @@
   jni_conv.ResetIterator(FrameOffset(out_arg_size));
   jni_conv.Next();  // Skip JNIEnv*
   if (is_static) {
+    jni_conv.Next(); // Skip Class for now
+  }
+  while (mr_conv.HasNext()) {
+    CHECK(jni_conv.HasNext());
+    CopyParameter(jni_asm, &mr_conv, &jni_conv, frame_size, out_arg_size);
+    mr_conv.Next();
+    jni_conv.Next();
+  }
+  if (is_static) {
+    // Create argument for Class
+    mr_conv.ResetIterator(FrameOffset(frame_size+out_arg_size));
+    jni_conv.ResetIterator(FrameOffset(out_arg_size));
+    jni_conv.Next();  // Skip JNIEnv*
     FrameOffset handle_offset = jni_conv.CurrentParamHandleOffset();
     if (jni_conv.IsCurrentParamOnStack()) {
       FrameOffset out_off = jni_conv.CurrentParamStackOffset();
@@ -159,13 +171,6 @@
       jni_asm->CreateStackHandle(out_reg, handle_offset,
                                  ManagedRegister::NoRegister(), false);
     }
-    jni_conv.Next();
-  }
-  while (mr_conv.HasNext()) {
-    CHECK(jni_conv.HasNext());
-    CopyParameter(jni_asm, &mr_conv, &jni_conv, frame_size, out_arg_size);
-    mr_conv.Next();
-    jni_conv.Next();
   }
   // 9. Create 1st argument, the JNI environment ptr
   jni_conv.ResetIterator(FrameOffset(out_arg_size));
@@ -179,9 +184,15 @@
   }
 
   // 10. Plant call to native code associated with method
-  jni_asm->Call(mr_conv.MethodRegister(), Method::NativeMethodOffset(),
-                mr_conv.InterproceduralScratchRegister());
-
+  if (!jni_conv.IsOutArgRegister(mr_conv.MethodRegister())) {
+    // Method register shouldn't have been crushed by setting up outgoing
+    // arguments
+    jni_asm->Call(mr_conv.MethodRegister(), Method::NativeMethodOffset(),
+                  mr_conv.InterproceduralScratchRegister());
+  } else {
+    jni_asm->Call(jni_conv.MethodStackOffset(), Method::NativeMethodOffset(),
+                  mr_conv.InterproceduralScratchRegister());
+  }
   // 11. Release lock for synchronized methods.
   if (native_method->IsSynchronized()) {
     mr_conv.ResetIterator(FrameOffset(frame_size+out_arg_size));
@@ -294,7 +305,6 @@
     CHECK_LT(handle_offset.Uint32Value(), (frame_size+out_arg_size));
   }
   if (input_in_reg && output_in_reg) {
-    LOG(FATAL) << "UNTESTED";
     ManagedRegister in_reg = mr_conv->CurrentParamRegister();
     ManagedRegister out_reg = jni_conv->CurrentParamRegister();
     if (ref_param) {
@@ -317,7 +327,6 @@
                     param_size);
     }
   } else if (!input_in_reg && output_in_reg) {
-    LOG(FATAL) << "UNTESTED";
     FrameOffset in_off = mr_conv->CurrentParamStackOffset();
     ManagedRegister out_reg = jni_conv->CurrentParamRegister();
     // Check that incoming stack arguments are above the current stack frame.
@@ -331,7 +340,6 @@
       jni_asm->Load(out_reg, in_off, param_size);
     }
   } else {
-    LOG(FATAL) << "UNTESTED";
     CHECK(input_in_reg && !output_in_reg);
     ManagedRegister in_reg = mr_conv->CurrentParamRegister();
     FrameOffset out_off = jni_conv->CurrentParamStackOffset();
@@ -362,8 +370,8 @@
   // code cache.
   jni_code_size_ = 4096;
   jni_code_ = static_cast<byte*>(mmap(NULL, jni_code_size_,
-                                 PROT_READ | PROT_WRITE | PROT_EXEC,
-                                 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
+                                      PROT_READ | PROT_WRITE | PROT_EXEC,
+                                      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
   CHECK_NE(MAP_FAILED, jni_code_);
   jni_code_top_ = jni_code_;
 }
diff --git a/src/jni_compiler_test.cc b/src/jni_compiler_test.cc
index 225ca5c..9c88c84 100644
--- a/src/jni_compiler_test.cc
+++ b/src/jni_compiler_test.cc
@@ -31,26 +31,47 @@
     thk_asm.movl(EDI, Address(ESP, 12));  // EDI = method
     thk_asm.pushl(Immediate(0));          // push pad
     thk_asm.pushl(Immediate(0));          // push pad
-    thk_asm.pushl(Address(ESP, 40));      // push pad  or jlong high
-    thk_asm.pushl(Address(ESP, 40));      // push jint or jlong low
-    thk_asm.pushl(Address(ESP, 40));      // push jint or jlong high
-    thk_asm.pushl(Address(ESP, 40));      // push jint or jlong low
-    thk_asm.pushl(Address(ESP, 40));      // push jobject
+    thk_asm.pushl(Address(ESP, 44));      // push pad  or jlong high
+    thk_asm.pushl(Address(ESP, 44));      // push jint or jlong low
+    thk_asm.pushl(Address(ESP, 44));      // push jint or jlong high
+    thk_asm.pushl(Address(ESP, 44));      // push jint or jlong low
+    thk_asm.pushl(Address(ESP, 44));      // push jobject
     thk_asm.call(EAX);                    // Continue in method->GetCode()
     thk_asm.addl(ESP, Immediate(28));     // pop arguments
     thk_asm.popl(EDI);                    // restore EDI
     thk_asm.ret();
+#elif defined(__arm__)
+    thk_asm.AddConstant(SP, -32);         // Build frame
+    thk_asm.StoreToOffset(kStoreWord, LR, SP, 28); // Spill link register
+    thk_asm.StoreToOffset(kStoreWord, R9, SP, 24); // Spill R9
+    thk_asm.mov(R12, ShifterOperand(R0)); // R12 = method->GetCode()
+    thk_asm.mov(R0,  ShifterOperand(R1)); // R0  = method
+    thk_asm.mov(R9,  ShifterOperand(R2)); // R9  = Thread::Current()
+    thk_asm.mov(R1,  ShifterOperand(R3)); // R1  = arg1 (jint/jlong low)
+    thk_asm.LoadFromOffset(kLoadWord, R3, SP, 44); // R3 = arg5 (pad/jlong high)
+    thk_asm.StoreToOffset(kStoreWord, R3, SP, 4);
+    thk_asm.LoadFromOffset(kLoadWord, R3, SP, 40); // R3 = arg4 (jint/jlong low)
+    thk_asm.StoreToOffset(kStoreWord, R3, SP, 0);
+    thk_asm.LoadFromOffset(kLoadWord, R3, SP, 36); // R3 = arg3 (jint/jlong high)
+    thk_asm.LoadFromOffset(kLoadWord, R2, SP, 32); // R2 = arg2 (jint/jlong high)
+    thk_asm.blx(R12);                     // Branch and link R12
+    thk_asm.LoadFromOffset(kLoadWord, LR, SP, 28); // Fill link register
+    thk_asm.LoadFromOffset(kLoadWord, R9, SP, 24); // Fill R9
+    thk_asm.AddConstant(SP, 32);          // Remove frame
+    thk_asm.mov(PC, ShifterOperand(LR));  // Return
 #else
-    LOG(FATAL) << "Unimplemented";
+#error Unimplemented
 #endif
     size_t cs = thk_asm.CodeSize();
     MemoryRegion code(thunk_, cs);
     thk_asm.FinalizeInstructions(code);
     thunk_entry1_ = reinterpret_cast<jint (*)(const void*, art::Method*,
-                                              jobject, jint, jint, jint)
+                                              Thread*, jobject, jint, jint,
+                                              jint)
                                     >(code.pointer());
     thunk_entry2_ = reinterpret_cast<jdouble (*)(const void*, art::Method*,
-                                                 jobject, jdouble, jdouble)
+                                                 Thread*, jobject, jdouble,
+                                                 jdouble)
                                     >(code.pointer());
   }
 
@@ -69,7 +90,8 @@
     EXPECT_EQ(0u, Thread::Current()->NumShbHandles());
     EXPECT_EQ(Thread::kRunnable, Thread::Current()->GetState());
     // perform call
-    result.i = (*thunk_entry1_)(method->GetCode(), method, a.l, b.i, c.i, d.i);
+    result.i = (*thunk_entry1_)(method->GetCode(), method, Thread::Current(),
+                                a.l, b.i, c.i, d.i);
     // sanity check post-call
     EXPECT_EQ(0u, Thread::Current()->NumShbHandles());
     EXPECT_EQ(Thread::kRunnable, Thread::Current()->GetState());
@@ -85,7 +107,8 @@
     EXPECT_EQ(0u, Thread::Current()->NumShbHandles());
     EXPECT_EQ(Thread::kRunnable, Thread::Current()->GetState());
     // perform call
-    result.d = (*thunk_entry2_)(method->GetCode(), method, a.l, b.d, c.d);
+    result.d = (*thunk_entry2_)(method->GetCode(), method, Thread::Current(),
+                                a.l, b.d, c.d);
     // sanity check post-call
     EXPECT_EQ(0u, Thread::Current()->NumShbHandles());
     EXPECT_EQ(Thread::kRunnable, Thread::Current()->GetState());
@@ -94,8 +117,10 @@
 
   void* thunk_;
   size_t thunk_code_size_;
-  jint (*thunk_entry1_)(const void*, Method*, jobject, jint, jint, jint);
-  jdouble (*thunk_entry2_)(const void*, Method*, jobject, jdouble, jdouble);
+  jint (*thunk_entry1_)(const void*, Method*, Thread*, jobject, jint, jint,
+                        jint);
+  jdouble (*thunk_entry2_)(const void*, Method*, Thread*, jobject, jdouble,
+                           jdouble);
 };
 
 int gJava_MyClass_foo_calls = 0;
diff --git a/src/object.cc b/src/object.cc
index 6d18beb..289f526 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -125,6 +125,8 @@
   CHECK_LT(param, NumArgs());
   if (IsStatic()) {
     param++;  // 0th argument must skip return value at start of the shorty
+  } else if (param == 0) {
+    return false;  // this argument
   }
   return (shorty_[param] == 'J') || (shorty_[param] == 'D');
 }