Fix JNI compiler for synchronized methods.

Calls to the monitor enter/exit routines were passing the JNI env with
the iterator in the wrong position. Reset the iterator to make sure it
is in the correct position for the monitor enter/exit call.

Also fix clobbering of arguments in registers when calling monitor enter
for synchronized methods on ARM.

Also some tidying of code/comments.

Change-Id: I5bf1dd7e65d925e768411cb5865919ee5f54edbf
diff --git a/src/assembler_arm.cc b/src/assembler_arm.cc
index 9b8cc3a..c13db39 100644
--- a/src/assembler_arm.cc
+++ b/src/assembler_arm.cc
@@ -1413,34 +1413,50 @@
   mov(rd, ShifterOperand(rm, ROR, 0), cond);
 }
 
-// Emit code that will create an activation on the stack
-void Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg) {
-  CHECK(IsAligned(frame_size, 16));
-  // TODO: use stm/ldm
+void Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                           const std::vector<ManagedRegister>& spill_regs) {
+  CHECK(IsAligned(frame_size, kStackAlignment));
+  CHECK_EQ(R0, method_reg.AsCoreRegister());
   AddConstant(SP, -frame_size);
-  StoreToOffset(kStoreWord, LR, SP, frame_size - 4);
-  StoreToOffset(kStoreWord, method_reg.AsCoreRegister(), SP, 0);
+  RegList spill_list = 1 << R0 | 1 << LR;
+  for(size_t i = 0; i < spill_regs.size(); i++) {
+    Register reg = spill_regs.at(i).AsCoreRegister();
+    // check assumption LR is the last register that gets spilled
+    CHECK_LT(reg, LR);
+    spill_list |= 1 << reg;
+  }
+  // Store spill list from (low to high number register) starting at SP
+  // incrementing after each store but not updating SP
+  stm(IA, SP, spill_list, AL);
 }
 
-// Emit code that will remove an activation from the stack
-void Assembler::RemoveFrame(size_t frame_size) {
-  CHECK(IsAligned(frame_size, 16));
-  LoadFromOffset(kLoadWord, LR, SP, frame_size - 4);
+void Assembler::RemoveFrame(size_t frame_size,
+                            const std::vector<ManagedRegister>& spill_regs) {
+  CHECK(IsAligned(frame_size, kStackAlignment));
+  // Reload LR. TODO: reload any saved callee saves from spill_regs
+  LoadFromOffset(kLoadWord, LR, SP, (spill_regs.size() + 1) * kPointerSize);
   AddConstant(SP, frame_size);
   mov(PC, ShifterOperand(LR));
 }
 
+void Assembler::FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
+                                  size_t displacement) {
+  for(size_t i = 0; i < spill_regs.size(); i++) {
+    Register reg = spill_regs.at(i).AsCoreRegister();
+    LoadFromOffset(kLoadWord, reg, SP, displacement + ((i + 1) * kPointerSize));
+  }
+}
+
 void Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK(IsAligned(adjust, 16));
+  CHECK(IsAligned(adjust, kStackAlignment));
   AddConstant(SP, -adjust);
 }
 
 void Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK(IsAligned(adjust, 16));
+  CHECK(IsAligned(adjust, kStackAlignment));
   AddConstant(SP, adjust);
 }
 
-// Store bytes from the given register onto the stack
 void Assembler::Store(FrameOffset dest, ManagedRegister src, size_t size) {
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
@@ -1671,9 +1687,6 @@
   // TODO: place reference map on call
 }
 
-// Generate code to check if Thread::Current()->suspend_count_ is non-zero
-// and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
-// at the next instruction.
 void Assembler::SuspendPoll(ManagedRegister scratch, ManagedRegister return_reg,
                             FrameOffset return_save_location,
                             size_t return_size) {
@@ -1703,8 +1716,6 @@
   sp_asm->b(&continuation_);
 }
 
-// Generate code to check if Thread::Current()->exception_ is non-null
-// and branch to a ExceptionSlowPath if it is.
 void Assembler::ExceptionPoll(ManagedRegister scratch) {
   ExceptionSlowPath* slow = new ExceptionSlowPath();
   buffer_.EnqueueSlowPath(slow);
diff --git a/src/assembler_arm.h b/src/assembler_arm.h
index f82107c..99ff9fe 100644
--- a/src/assembler_arm.h
+++ b/src/assembler_arm.h
@@ -8,6 +8,7 @@
 #include "logging.h"
 #include "offsets.h"
 #include "utils.h"
+#include <vector>
 
 namespace art {
 
@@ -409,10 +410,16 @@
   static bool IsInstructionForExceptionHandling(uword pc);
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg);
+  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                  const std::vector<ManagedRegister>& spill_regs);
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size);
+  void RemoveFrame(size_t frame_size,
+                   const std::vector<ManagedRegister>& spill_regs);
+
+  // Fill registers from spill area, excluding R0 (Method*) and LR
+  void FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
+                         size_t displacement);
 
   void IncreaseFrameSize(size_t adjust);
   void DecreaseFrameSize(size_t adjust);
diff --git a/src/assembler_x86.cc b/src/assembler_x86.cc
index 80896c5..b6183a9 100644
--- a/src/assembler_x86.cc
+++ b/src/assembler_x86.cc
@@ -1173,9 +1173,10 @@
 }
 
 
-void Assembler::lock() {
+Assembler* Assembler::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF0);
+  return this;
 }
 
 
@@ -1186,10 +1187,11 @@
   EmitOperand(reg, address);
 }
 
-void Assembler::fs() {
+Assembler* Assembler::fs() {
   // TODO: fs is a prefix and not an instruction
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x64);
+  return this;
 }
 
 void Assembler::AddImmediate(Register reg, const Immediate& imm) {
@@ -1370,32 +1372,39 @@
   EmitOperand(rm, Operand(operand));
 }
 
-// Emit code that will create an activation on the stack
-void Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg) {
-  CHECK(IsAligned(frame_size, 16));
+void Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                           const std::vector<ManagedRegister>& spill_regs) {
+  CHECK(IsAligned(frame_size, kStackAlignment));
+  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
   // return address then method on stack
-  addl(ESP, Immediate(-frame_size + 4 /*method*/ + 4 /*return address*/));
+  addl(ESP, Immediate(-frame_size + kPointerSize /*method*/ +
+                      kPointerSize /*return address*/));
   pushl(method_reg.AsCpuRegister());
 }
 
-// Emit code that will remove an activation from the stack
-void Assembler::RemoveFrame(size_t frame_size) {
-  CHECK(IsAligned(frame_size, 16));
-  addl(ESP, Immediate(frame_size - 4));
+void Assembler::RemoveFrame(size_t frame_size,
+                            const std::vector<ManagedRegister>& spill_regs) {
+  CHECK(IsAligned(frame_size, kStackAlignment));
+  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
+  addl(ESP, Immediate(frame_size - kPointerSize));
   ret();
 }
 
+void Assembler::FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
+                                  size_t displacement) {
+  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
+}
+
 void Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK(IsAligned(adjust, 16));
+  CHECK(IsAligned(adjust, kStackAlignment));
   addl(ESP, Immediate(-adjust));
 }
 
 void Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK(IsAligned(adjust, 16));
+  CHECK(IsAligned(adjust, kStackAlignment));
   addl(ESP, Immediate(adjust));
 }
 
-// Store bytes from the given register onto the stack
 void Assembler::Store(FrameOffset offs, ManagedRegister src, size_t size) {
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
@@ -1442,8 +1451,7 @@
 
 void Assembler::StoreImmediateToThread(ThreadOffset dest, uint32_t imm,
                                        ManagedRegister) {
-  fs();
-  movl(Address::Absolute(dest), Immediate(imm));
+  fs()->movl(Address::Absolute(dest), Immediate(imm));
 }
 
 void Assembler::Load(ManagedRegister dest, FrameOffset src, size_t size) {
@@ -1481,15 +1489,13 @@
 
 void Assembler::LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset offs) {
   CHECK(dest.IsCpuRegister());
-  fs();
-  movl(dest.AsCpuRegister(), Address::Absolute(offs));
+  fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
 }
 
 void Assembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset thr_offs,
                           ManagedRegister scratch) {
   CHECK(scratch.IsCpuRegister());
-  fs();
-  movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
+  fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
   Store(fr_offs, scratch, 4);
 }
 
@@ -1497,8 +1503,7 @@
                                    ManagedRegister scratch) {
   CHECK(scratch.IsCpuRegister());
   Load(scratch, fr_offs, 4);
-  fs();
-  movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
 }
 
 void Assembler::StoreStackOffsetToThread(ThreadOffset thr_offs,
@@ -1506,13 +1511,11 @@
                                          ManagedRegister scratch) {
   CHECK(scratch.IsCpuRegister());
   leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
-  fs();
-  movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+  fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
 }
 
 void Assembler::StoreStackPointerToThread(ThreadOffset thr_offs) {
-  fs();
-  movl(Address::Absolute(thr_offs), ESP);
+  fs()->movl(Address::Absolute(thr_offs), ESP);
 }
 
 void Assembler::Move(ManagedRegister dest, ManagedRegister src) {
@@ -1611,57 +1614,54 @@
   UNIMPLEMENTED(FATAL);
 }
 
-// Generate code to check if Thread::Current()->suspend_count_ is non-zero
-// and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
-// at the next instruction.
 void Assembler::SuspendPoll(ManagedRegister scratch, ManagedRegister return_reg,
                             FrameOffset return_save_location,
                             size_t return_size) {
   SuspendCountSlowPath* slow =
       new SuspendCountSlowPath(return_reg, return_save_location, return_size);
   buffer_.EnqueueSlowPath(slow);
-  fs();
-  cmpl(Address::Absolute(Thread::SuspendCountOffset()), Immediate(0));
+  fs()->cmpl(Address::Absolute(Thread::SuspendCountOffset()), Immediate(0));
   j(NOT_EQUAL, slow->Entry());
   Bind(slow->Continuation());
 }
+
 void SuspendCountSlowPath::Emit(Assembler *sp_asm) {
-  sp_asm->Bind(&entry_);
+#define __ sp_asm->
+  __ Bind(&entry_);
   // Save return value
-  sp_asm->Store(return_save_location_, return_register_, return_size_);
+  __ Store(return_save_location_, return_register_, return_size_);
   // Pass top of stack as argument
-  sp_asm->pushl(ESP);
-  sp_asm->fs();
-  sp_asm->call(Address::Absolute(Thread::SuspendCountEntryPointOffset()));
+  __ pushl(ESP);
+  __ fs()->call(Address::Absolute(Thread::SuspendCountEntryPointOffset()));
   // Release argument
-  sp_asm->addl(ESP, Immediate(kPointerSize));
+  __ addl(ESP, Immediate(kPointerSize));
   // Reload return value
-  sp_asm->Load(return_register_, return_save_location_, return_size_);
-  sp_asm->jmp(&continuation_);
+  __ Load(return_register_, return_save_location_, return_size_);
+  __ jmp(&continuation_);
+#undef __
 }
 
-// Generate code to check if Thread::Current()->exception_ is non-null
-// and branch to a ExceptionSlowPath if it is.
 void Assembler::ExceptionPoll(ManagedRegister scratch) {
   ExceptionSlowPath* slow = new ExceptionSlowPath();
   buffer_.EnqueueSlowPath(slow);
-  fs();
-  cmpl(Address::Absolute(Thread::ExceptionOffset()), Immediate(0));
+  fs()->cmpl(Address::Absolute(Thread::ExceptionOffset()), Immediate(0));
   j(NOT_EQUAL, slow->Entry());
   Bind(slow->Continuation());
 }
+
 void ExceptionSlowPath::Emit(Assembler *sp_asm) {
-  sp_asm->Bind(&entry_);
+#define __ sp_asm->
+  __ Bind(&entry_);
   // NB the return value is dead
   // Pass top of stack as argument
-  sp_asm->pushl(ESP);
-  sp_asm->fs();
-  sp_asm->call(Address::Absolute(Thread::ExceptionEntryPointOffset()));
+  __ pushl(ESP);
+  __ fs()->call(Address::Absolute(Thread::ExceptionEntryPointOffset()));
   // TODO: this call should never return as it should make a long jump to
   // the appropriate catch block
   // Release argument
-  sp_asm->addl(ESP, Immediate(kPointerSize));
-  sp_asm->jmp(&continuation_);
+  __ addl(ESP, Immediate(kPointerSize));
+  __ jmp(&continuation_);
+#undef __
 }
 
 }  // namespace art
diff --git a/src/assembler_x86.h b/src/assembler_x86.h
index 400a304..2c7f2d2 100644
--- a/src/assembler_x86.h
+++ b/src/assembler_x86.h
@@ -3,6 +3,7 @@
 #ifndef ART_SRC_ASSEMBLER_X86_H_
 #define ART_SRC_ASSEMBLER_X86_H_
 
+#include <vector>
 #include "assembler.h"
 #include "constants.h"
 #include "globals.h"
@@ -398,20 +399,26 @@
   void jmp(Register reg);
   void jmp(Label* label);
 
-  void lock();
+  Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
 
-  void fs();
+  Assembler* fs();
 
   //
   // Macros for High-level operations.
   //
 
   // Emit code that will create an activation on the stack
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg);
+  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                  const std::vector<ManagedRegister>& spill_regs);
 
   // Emit code that will remove an activation from the stack
-  void RemoveFrame(size_t frame_size);
+  void RemoveFrame(size_t frame_size,
+                   const std::vector<ManagedRegister>& spill_regs);
+
+  // Fill registers from spill area - no-op on x86
+  void FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
+                         size_t displacement);
 
   void IncreaseFrameSize(size_t adjust);
   void DecreaseFrameSize(size_t adjust);
@@ -486,8 +493,7 @@
   void DoubleAbs(XmmRegister reg);
 
   void LockCmpxchgl(const Address& address, Register reg) {
-    lock();
-    cmpxchgl(address, reg);
+    lock()->cmpxchgl(address, reg);
   }
 
   //
diff --git a/src/calling_convention.cc b/src/calling_convention.cc
index ab6bba9..d14857b 100644
--- a/src/calling_convention.cc
+++ b/src/calling_convention.cc
@@ -51,14 +51,6 @@
 
 // JNI calling convention
 
-size_t JniCallingConvention::FrameSize() {
-  // Return address and Method*
-  size_t frame_data_size = 2 * kPointerSize;
-  // Handles plus 2 words for SHB header
-  size_t handle_area_size = (HandleCount() + 2) * kPointerSize;
-  return RoundUp(frame_data_size + handle_area_size + SizeOfReturnValue(), 16);
-}
-
 size_t JniCallingConvention::OutArgSize() {
   return RoundUp(NumberOfOutgoingStackArgs() * kPointerSize, 16);
 }
diff --git a/src/calling_convention.h b/src/calling_convention.h
index adcafca..7bfc28e 100644
--- a/src/calling_convention.h
+++ b/src/calling_convention.h
@@ -3,6 +3,7 @@
 #ifndef ART_SRC_CALLING_CONVENTION_H_
 #define ART_SRC_CALLING_CONVENTION_H_
 
+#include <vector>
 #include "managed_register.h"
 #include "object.h"
 #include "thread.h"
@@ -84,12 +85,12 @@
 
 // Abstraction for JNI calling conventions
 // | incoming stack args    | <-- Prior SP
-// | { Spilled registers    |
-// |   & return address }   |
+// | { Return address }     |     (x86)
 // | { Return value spill } |     (live on return slow paths)
 // | { Stack Handle Block   |
 // |   ...                  |
 // |   num. refs./link }    |     (here to prior SP is frame size)
+// | { Spill area }         |     (ARM)
 // | Method*                | <-- Anchor SP written to thread
 // | { Outgoing stack args  |
 // |   ... }                | <-- SP at point of call
@@ -97,7 +98,8 @@
 class JniCallingConvention : public CallingConvention {
  public:
   explicit JniCallingConvention(Method* native_method) :
-                      CallingConvention(native_method) {}
+                      CallingConvention(native_method),
+                      spill_regs_(ComputeRegsToSpillPreCall()) {}
 
   // Size of frame excluding space for outgoing args (its assumed Method* is
   // always at the bottom of a frame, but this doesn't work for outgoing
@@ -107,10 +109,18 @@
   size_t OutArgSize();
   // Number of handles in stack handle block
   size_t HandleCount();
+  // Size of area used to hold spilled registers
+  size_t SpillAreaSize();
   // Location where the return value of a call can be squirreled if another
   // call is made following the native call
   FrameOffset ReturnValueSaveLocation();
 
+  // Registers that must be spilled (due to clobbering) before the call into
+  // the native routine
+  const std::vector<ManagedRegister>& RegsToSpillPreCall() {
+    return *spill_regs_.get();
+  }
+
   // Returns true if the register will be clobbered by an outgoing
   // argument value.
   bool IsOutArgRegister(ManagedRegister reg);
@@ -131,6 +141,7 @@
   // Position of stack handle block and interior fields
   FrameOffset ShbOffset() {
     return FrameOffset(displacement_.Int32Value() +
+                       SpillAreaSize() +
                        kPointerSize);  // above Method*
   }
   FrameOffset ShbNumRefsOffset() {
@@ -153,6 +164,12 @@
   // located
   size_t NumberOfOutgoingStackArgs();
 
+  // Compute registers for RegsToSpillPreCall
+  std::vector<ManagedRegister>* ComputeRegsToSpillPreCall();
+
+  // Extra registers to spill before the call into native
+  const scoped_ptr<std::vector<ManagedRegister> > spill_regs_;
+
   static size_t NumberOfExtraArgumentsForJni(const Method* method);
   DISALLOW_COPY_AND_ASSIGN(JniCallingConvention);
 };
diff --git a/src/calling_convention_arm.cc b/src/calling_convention_arm.cc
index bdf2e45..0300ad1 100644
--- a/src/calling_convention_arm.cc
+++ b/src/calling_convention_arm.cc
@@ -73,6 +73,34 @@
 
 // JNI calling convention
 
+size_t JniCallingConvention::FrameSize() {
+  // Method* and spill area size
+  size_t frame_data_size = kPointerSize + SpillAreaSize();
+  // Handles plus 2 words for SHB header
+  size_t handle_area_size = (HandleCount() + 2) * kPointerSize;
+  // Plus return value spill area size
+  return RoundUp(frame_data_size + handle_area_size + SizeOfReturnValue(), 16);
+}
+
+size_t JniCallingConvention::SpillAreaSize() {
+  // Space for link register. For synchronized methods we need enough space to
+  // save R1, R2 and R3 (R0 is the method register and always preserved)
+  return GetMethod()->IsSynchronized() ? (4 * kPointerSize) : kPointerSize;
+}
+
+std::vector<ManagedRegister>* JniCallingConvention::ComputeRegsToSpillPreCall()
+{
+  // A synchronized method will call monitor enter clobbering R1, R2 and R3
+  // unless they are spilled.
+  std::vector<ManagedRegister>* result = new std::vector<ManagedRegister>();
+  if (GetMethod()->IsSynchronized()) {
+    result->push_back(ManagedRegister::FromCoreRegister(R1));
+    result->push_back(ManagedRegister::FromCoreRegister(R2));
+    result->push_back(ManagedRegister::FromCoreRegister(R3));
+  }
+  return result;
+}
+
 // Will reg be crushed by an outgoing argument?
 bool JniCallingConvention::IsOutArgRegister(ManagedRegister) {
   // R0 holds the method register and will be crushed by the JNIEnv*
diff --git a/src/calling_convention_x86.cc b/src/calling_convention_x86.cc
index 2d9b07f..9420849 100644
--- a/src/calling_convention_x86.cc
+++ b/src/calling_convention_x86.cc
@@ -48,6 +48,27 @@
 
 // JNI calling convention
 
+size_t JniCallingConvention::FrameSize() {
+  // Return address and Method*
+  size_t frame_data_size = 2 * kPointerSize;
+  // Handles plus 2 words for SHB header
+  size_t handle_area_size = (HandleCount() + 2) * kPointerSize;
+  // Plus return value spill area size
+  return RoundUp(frame_data_size + handle_area_size + SizeOfReturnValue(), 16);
+}
+
+size_t JniCallingConvention::SpillAreaSize() {
+  // No spills, return address was pushed at the top of the frame
+  return 0;
+}
+
+std::vector<ManagedRegister>* JniCallingConvention::ComputeRegsToSpillPreCall()
+{
+  // No live values in registers (everything is on the stack) so never anything
+  // to preserve.
+  return  new std::vector<ManagedRegister>();
+}
+
 bool JniCallingConvention::IsOutArgRegister(ManagedRegister) {
   return false;  // Everything is passed by stack
 }
diff --git a/src/globals.h b/src/globals.h
index b7096e6..777cfd6 100644
--- a/src/globals.h
+++ b/src/globals.h
@@ -32,6 +32,8 @@
 const int kBitsPerWord = kWordSize * kBitsPerByte;
 const int kBitsPerInt = kIntSize * kBitsPerByte;
 
+// Required stack alignment
+const int kStackAlignment = 16;
 
 // System page size.  Normally you're expected to get this from
 // sysconf(_SC_PAGESIZE) or some system-specific define (usually
diff --git a/src/jni_compiler.cc b/src/jni_compiler.cc
index 07aeec3..dd7d5e5 100644
--- a/src/jni_compiler.cc
+++ b/src/jni_compiler.cc
@@ -28,7 +28,8 @@
 
   // 1. Build the frame
   const size_t frame_size(jni_conv.FrameSize());
-  jni_asm->BuildFrame(frame_size, mr_conv.MethodRegister());
+  const std::vector<ManagedRegister>& spill_regs = jni_conv.RegsToSpillPreCall();
+  jni_asm->BuildFrame(frame_size, mr_conv.MethodRegister(), spill_regs);
 
   // 2. Save callee save registers that aren't callee save in the native code
   // TODO: implement computing the difference of the callee saves
@@ -124,6 +125,7 @@
       CopyParameter(jni_asm, &mr_conv, &jni_conv, frame_size, out_arg_size);
     }
     // Generate JNIEnv* in place and leave a copy in jni_env_register
+    jni_conv.ResetIterator(FrameOffset(out_arg_size));
     ManagedRegister jni_env_register =
         jni_conv.InterproceduralScratchRegister();
     if (jni_conv.IsCurrentParamInRegister()) {
@@ -138,6 +140,7 @@
     static Offset monitor_enter(OFFSETOF_MEMBER(JNIEnvExt, MonitorEnterHelper));
     jni_asm->Call(jni_env_register, monitor_enter,
                   jni_conv.InterproceduralScratchRegister());
+    jni_asm->FillFromSpillArea(spill_regs, out_arg_size);
     jni_asm->ExceptionPoll(jni_conv.InterproceduralScratchRegister());
   }
 
@@ -224,6 +227,7 @@
       CopyParameter(jni_asm, &mr_conv, &jni_conv, frame_size, out_arg_size);
     }
     // Generate JNIEnv* in place and leave a copy in jni_env_register
+    jni_conv.ResetIterator(FrameOffset(out_arg_size));
     ManagedRegister jni_env_register =
         jni_conv.InterproceduralScratchRegister();
     if (jni_conv.IsCurrentParamInRegister()) {
@@ -238,7 +242,6 @@
     static Offset monitor_exit(OFFSETOF_MEMBER(JNIEnvExt, MonitorExitHelper));
     jni_asm->Call(jni_env_register, monitor_exit,
                   jni_conv.InterproceduralScratchRegister());
-    jni_asm->ExceptionPoll(jni_conv.InterproceduralScratchRegister());
     // Reload return value
     jni_asm->Load(jni_conv.ReturnRegister(), return_save_location,
                   jni_conv.SizeOfReturnValue());
@@ -277,7 +280,7 @@
                               jni_conv.InterproceduralScratchRegister());
 
   // 17. Remove activation
-  jni_asm->RemoveFrame(frame_size);
+  jni_asm->RemoveFrame(frame_size, spill_regs);
 
   // 18. Finalize code generation
   jni_asm->EmitSlowPaths();
diff --git a/src/jni_compiler_test.cc b/src/jni_compiler_test.cc
index f717014..59b9004 100644
--- a/src/jni_compiler_test.cc
+++ b/src/jni_compiler_test.cc
@@ -512,7 +512,7 @@
   gSuspendCounterHandler_calls++;
   Thread::Current()->DecrementSuspendCount();
 }
-TEST_F(JniCompilerTest, SuspendCountAcknolewdgement) {
+TEST_F(JniCompilerTest, SuspendCountAcknowledgement) {
   scoped_ptr<DexFile> dex(OpenDexFileBase64(kMyClassNativesDex));
   PathClassLoader* class_loader = AllocPathClassLoader(dex.get());
 
diff --git a/src/jni_internal.cc b/src/jni_internal.cc
index 71d1204..9a0563b 100644
--- a/src/jni_internal.cc
+++ b/src/jni_internal.cc
@@ -1890,7 +1890,6 @@
 };
 
 void MonitorEnterHelper(JNIEnv* env, jobject obj) {
-  // env = Thread::Current()->GetJniEnv();  // uncomment this if you want your tests to pass
   CHECK_EQ(Thread::Current()->GetJniEnv(), env);
   MonitorEnter(env, obj);  // Ignore the result.
 }