Revert "Revert "ART: Improve JitProfile perf in x86_64 mterp""

Bug: 215853

Bug triggering original revert fixed by:
https://android-review.googlesource.com/#/c/214728

This CL additionally corrects a secondary bug in argument setup
appearing in both x86 and x86_64 versions.

This reverts commit 0402c5690b1a961e923a39dab92ec1ee0b54b05a.

Change-Id: If86a5d43469d8a958e007acc0afe924330de5c16
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index f800683..e005589 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -694,7 +694,7 @@
   return MterpSetUpHotnessCountdown(method, shadow_frame);
 }
 
-// TUNING: Unused by arm/arm64/x86.  Remove when x86_64/mips/mips64 mterps support batch updates.
+// TUNING: Unused by arm/arm64/x86/x86_64.  Remove when mips/mips64 mterps support batch updates.
 extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 685b9b6..e46f9cd 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -12989,7 +12989,7 @@
     movl    %eax, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG3(%esp)
+    movl    $2, OUT_ARG2(%esp)
     call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
     testb   %al, %al
     REFRESH_IBASE
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index a1360e0..62dce6e 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -67,7 +67,7 @@
 Some key interpreter variables will be assigned to registers.
 
   nick     reg   purpose
-  rSELF    rbp   pointer to ThreadSelf.
+  rPROFILE rbp   countdown register for jit profiling
   rPC      r12   interpreted program counter, used for fetching instructions
   rFP      r13   interpreted frame pointer, used for accessing locals and args
   rINSTw   bx    first 16-bit code of current instruction
@@ -120,6 +120,21 @@
     .cfi_restore \_reg
 .endm
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
 /* Frame size must be 16-byte aligned.
  * Remember about 8 bytes for return address + 6 * 8 for spills.
  */
@@ -130,6 +145,8 @@
 #define IN_ARG2        %rdx
 #define IN_ARG1        %rsi
 #define IN_ARG0        %rdi
+/* Spill offsets relative to %esp */
+#define SELF_SPILL     (FRAME_SIZE -  8)
 /* Out Args  */
 #define OUT_ARG3       %rcx
 #define OUT_ARG2       %rdx
@@ -144,7 +161,7 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rSELF    %rbp
+#define rSELF    SELF_SPILL(%rsp)
 #define rPC      %r12
 #define rFP      %r13
 #define rINST    %ebx
@@ -154,40 +171,11 @@
 #define rINSTbl  %bl
 #define rIBASE   %r14
 #define rREFS    %r15
+#define rPROFILE %ebp
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
-#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
- * Profile branch. rINST should contain the offset. %eax is scratch.
- */
-.macro MTERP_PROFILE_BRANCH
-#ifdef MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    rINST, OUT_32_ARG2
-    call    SYMBOL(MterpProfileBranch)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-#endif
-.endm
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -211,7 +199,8 @@
  *
  */
 .macro REFRESH_IBASE
-    movq    THREAD_CURRENT_IBASE_OFFSET(rSELF), rIBASE
+    movq    rSELF, rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
 .endm
 
 /*
@@ -377,6 +366,12 @@
     movq    IN_ARG0, rSELF
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+    movswl  %ax, rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GOTO_NEXT
@@ -579,9 +574,10 @@
 .L_op_move_exception: /* 0x0d */
 /* File: x86_64/op_move_exception.S */
     /* move-exception vAA */
-    movl    THREAD_EXCEPTION_OFFSET(rSELF), %eax
+    movq    rSELF, %rcx
+    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
     SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
-    movl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 /* ------------------------------ */
@@ -590,9 +586,9 @@
 /* File: x86_64/op_return_void.S */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
@@ -610,9 +606,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
@@ -628,9 +624,9 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
@@ -649,9 +645,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
@@ -854,7 +850,8 @@
     movq    rSELF, OUT_ARG3
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movsbl  %al, %eax
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     andb    $0xf, rINSTbl                  # rINSTbl <- A
     SET_VREG %eax, rINSTq
@@ -988,7 +985,8 @@
     GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
     testb   %al, %al
     jz      common_errNullObject
-    movq    %rax, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
     jmp     MterpException
 
 /* ------------------------------ */
@@ -1003,12 +1001,8 @@
  */
     /* goto +AA */
     movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1022,12 +1016,8 @@
  */
     /* goto/16 +AAAA */
     movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1044,12 +1034,8 @@
  */
     /* goto/32 +AAAAAAAA */
     movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1069,13 +1055,9 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoPackedSwitch)
+    testl   %eax, %eax
     movslq  %eax, rINSTq
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    jmp     MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1096,13 +1078,9 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoSparseSwitch)
+    testl   %eax, %eax
     movslq  %eax, rINSTq
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    jmp     MterpCommonTakenBranch
 
 
 /* ------------------------------ */
@@ -1309,16 +1287,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jne   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1339,16 +1315,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     je   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1369,16 +1343,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jge   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1399,16 +1371,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jl   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1429,16 +1399,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jle   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1459,16 +1427,14 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $2, rINST                      # assume not taken
     jg   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1485,16 +1451,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jne   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1511,16 +1475,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     je   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1537,16 +1499,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jge   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1563,16 +1523,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jl   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1589,16 +1547,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jle   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1615,16 +1571,14 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $2, rINST                      # assume branch not taken
     jg   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
 
 /* ------------------------------ */
@@ -1767,7 +1721,8 @@
     GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
     EXPORT_PC
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2099,7 +2054,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGet32InstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2131,7 +2087,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGet64InstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2164,7 +2121,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetObjInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 1
@@ -2197,7 +2155,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetBooleanInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2230,7 +2189,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetByteInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2263,7 +2223,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetCharInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2296,7 +2257,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetShortInstanceFromCode)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2489,7 +2451,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGet32StaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2519,7 +2482,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGet64StaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2550,7 +2514,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetObjStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 1
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2581,7 +2546,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetBooleanStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2612,7 +2578,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetByteStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2643,7 +2610,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetCharStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2674,7 +2642,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetShortStaticFromCode)
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -3002,9 +2971,9 @@
     .balign 128
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: x86_64/op_return_void_no_barrier.S */
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
@@ -5712,7 +5681,8 @@
     movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
     EXPORT_PC
     callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -11849,7 +11819,7 @@
 #if MTERP_LOGGING
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    THREAD_FLAGS_OFFSET(rSELF), OUT_32_ARG2
+    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
     call    SYMBOL(MterpLogSuspendFallback)
 #endif
     jmp     MterpCommonFallback
@@ -11860,7 +11830,8 @@
  * interpreter.
  */
 MterpPossibleException:
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jz      MterpFallback
     /* intentional fallthrough - handle pending exception. */
 
@@ -11891,19 +11862,113 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpl    $JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decl    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movq    rSELF, %rax
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    GOTO_NEXT
+
+.L_suspend_request_pending:
     EXPORT_PC
     movq    rSELF, OUT_ARG0
-    call    SYMBOL(MterpSuspendCheck)
-1:
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
     GOTO_NEXT
 
+.L_no_count_backwards:
+    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movswl  %ax, rPROFILE
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    $2, OUT_32_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -11943,7 +12008,28 @@
     movq    %rax, (%rdx)
     movl    $1, %eax
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    testl   rPROFILE, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
     /* pop up frame */
+MRestoreFrame:
     addq    $FRAME_SIZE, %rsp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index df10ff0..fa03e78 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -238,7 +238,7 @@
     movl    %eax, OUT_ARG0(%esp)
     leal    OFF_FP_SHADOWFRAME(rFP), %ecx
     movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG3(%esp)
+    movl    $$2, OUT_ARG2(%esp)
     call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
     testb   %al, %al
     REFRESH_IBASE
diff --git a/runtime/interpreter/mterp/x86_64/bincmp.S b/runtime/interpreter/mterp/x86_64/bincmp.S
index a16050b..6601483 100644
--- a/runtime/interpreter/mterp/x86_64/bincmp.S
+++ b/runtime/interpreter/mterp/x86_64/bincmp.S
@@ -11,13 +11,11 @@
     andb    $$0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
-    movl    $$2, rINST                      # assume not taken
     j${revcmp}   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rax <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/entry.S b/runtime/interpreter/mterp/x86_64/entry.S
index 69b2371..d992956 100644
--- a/runtime/interpreter/mterp/x86_64/entry.S
+++ b/runtime/interpreter/mterp/x86_64/entry.S
@@ -65,6 +65,12 @@
     movq    IN_ARG0, rSELF
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    call    SYMBOL(MterpSetUpHotnessCountdown)
+    movswl  %ax, rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/footer.S b/runtime/interpreter/mterp/x86_64/footer.S
index 573256b..54d0cb1 100644
--- a/runtime/interpreter/mterp/x86_64/footer.S
+++ b/runtime/interpreter/mterp/x86_64/footer.S
@@ -71,7 +71,7 @@
 #if MTERP_LOGGING
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    THREAD_FLAGS_OFFSET(rSELF), OUT_32_ARG2
+    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
     call    SYMBOL(MterpLogSuspendFallback)
 #endif
     jmp     MterpCommonFallback
@@ -82,7 +82,8 @@
  * interpreter.
  */
 MterpPossibleException:
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jz      MterpFallback
     /* intentional fallthrough - handle pending exception. */
 
@@ -113,19 +114,113 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranch:
+    jg      .L_forward_branch               # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_osr_check
+    decl    rPROFILE
+    je      .L_add_batch                    # counted down to zero - report
+.L_resume_backward_branch:
+    movq    rSELF, %rax
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    jnz     .L_suspend_request_pending
+    GOTO_NEXT
+
+.L_suspend_request_pending:
     EXPORT_PC
     movq    rSELF, OUT_ARG0
-    call    SYMBOL(MterpSuspendCheck)
-1:
+    call    SYMBOL(MterpSuspendCheck)       # (self)
+    testb   %al, %al
+    jnz     MterpFallback
+    REFRESH_IBASE                           # might have changed during suspend
     GOTO_NEXT
 
+.L_no_count_backwards:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    jne     .L_resume_backward_branch
+.L_osr_check:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_backward_branch
+    jmp     MterpOnStackReplacement
+
+.L_forward_branch:
+    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
+    je      .L_check_osr_forward
+.L_resume_forward_branch:
+    leaq    (rPC, rINSTq, 2), rPC
+    FETCH_INST
+    GOTO_NEXT
+
+.L_check_osr_forward:
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movq    rINSTq, OUT_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jz      .L_resume_forward_branch
+    jmp     MterpOnStackReplacement
+
+.L_add_batch:
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movswl  %ax, rPROFILE
+    jmp     .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    $$2, OUT_32_ARG2
+    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -165,7 +260,28 @@
     movq    %rax, (%rdx)
     movl    $$1, %eax
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    testl   rPROFILE, rPROFILE
+    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
+
+    movl    %eax, rINST                     # stash return value
+    /* Report cached hotness counts */
+    movl    rPROFILE, %eax
+    movq    OFF_FP_METHOD(rFP), OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
+    movq    rSELF, OUT_ARG2
+    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
+    movl    rINST, %eax                     # restore return value
+
     /* pop up frame */
+MRestoreFrame:
     addq    $$FRAME_SIZE, %rsp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86_64/header.S b/runtime/interpreter/mterp/x86_64/header.S
index eb84ea1..7699fc4 100644
--- a/runtime/interpreter/mterp/x86_64/header.S
+++ b/runtime/interpreter/mterp/x86_64/header.S
@@ -60,7 +60,7 @@
 Some key interpreter variables will be assigned to registers.
 
   nick     reg   purpose
-  rSELF    rbp   pointer to ThreadSelf.
+  rPROFILE rbp   countdown register for jit profiling
   rPC      r12   interpreted program counter, used for fetching instructions
   rFP      r13   interpreted frame pointer, used for accessing locals and args
   rINSTw   bx    first 16-bit code of current instruction
@@ -113,6 +113,21 @@
     .cfi_restore \_reg
 .endm
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
 /* Frame size must be 16-byte aligned.
  * Remember about 8 bytes for return address + 6 * 8 for spills.
  */
@@ -123,6 +138,8 @@
 #define IN_ARG2        %rdx
 #define IN_ARG1        %rsi
 #define IN_ARG0        %rdi
+/* Spill offsets relative to %esp */
+#define SELF_SPILL     (FRAME_SIZE -  8)
 /* Out Args  */
 #define OUT_ARG3       %rcx
 #define OUT_ARG2       %rdx
@@ -137,7 +154,7 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rSELF    %rbp
+#define rSELF    SELF_SPILL(%rsp)
 #define rPC      %r12
 #define rFP      %r13
 #define rINST    %ebx
@@ -147,40 +164,11 @@
 #define rINSTbl  %bl
 #define rIBASE   %r14
 #define rREFS    %r15
+#define rPROFILE %ebp
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
-#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
- * Profile branch. rINST should contain the offset. %eax is scratch.
- */
-.macro MTERP_PROFILE_BRANCH
-#ifdef MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    rINST, OUT_32_ARG2
-    call    SYMBOL(MterpProfileBranch)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-#endif
-.endm
-
-/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -204,7 +192,8 @@
  *
  */
 .macro REFRESH_IBASE
-    movq    THREAD_CURRENT_IBASE_OFFSET(rSELF), rIBASE
+    movq    rSELF, rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
 .endm
 
 /*
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_object.S b/runtime/interpreter/mterp/x86_64/op_aget_object.S
index 8baedea..5f77a97 100644
--- a/runtime/interpreter/mterp/x86_64/op_aget_object.S
+++ b/runtime/interpreter/mterp/x86_64/op_aget_object.S
@@ -10,7 +10,8 @@
     GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
     EXPORT_PC
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_goto.S b/runtime/interpreter/mterp/x86_64/op_goto.S
index c4fc976..9749901 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto.S
@@ -6,9 +6,5 @@
  */
     /* goto +AA */
     movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_16.S b/runtime/interpreter/mterp/x86_64/op_goto_16.S
index 8cb9a5c..77688e0 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto_16.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto_16.S
@@ -6,9 +6,5 @@
  */
     /* goto/16 +AAAA */
     movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_32.S b/runtime/interpreter/mterp/x86_64/op_goto_32.S
index 4ecdacd..29d777b 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto_32.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto_32.S
@@ -9,9 +9,5 @@
  */
     /* goto/32 +AAAAAAAA */
     movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_iget.S b/runtime/interpreter/mterp/x86_64/op_iget.S
index a0d0faf..df43efe 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget.S
@@ -12,7 +12,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL($helper)
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
     .if $is_object
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
index 964d20a..176c954 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
@@ -7,7 +7,8 @@
     movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
     EXPORT_PC
     callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
diff --git a/runtime/interpreter/mterp/x86_64/op_instance_of.S b/runtime/interpreter/mterp/x86_64/op_instance_of.S
index 6be37f9..4819833 100644
--- a/runtime/interpreter/mterp/x86_64/op_instance_of.S
+++ b/runtime/interpreter/mterp/x86_64/op_instance_of.S
@@ -14,7 +14,8 @@
     movq    rSELF, OUT_ARG3
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movsbl  %al, %eax
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     andb    $$0xf, rINSTbl                  # rINSTbl <- A
     SET_VREG %eax, rINSTq
diff --git a/runtime/interpreter/mterp/x86_64/op_move_exception.S b/runtime/interpreter/mterp/x86_64/op_move_exception.S
index d0a14fd..33db878 100644
--- a/runtime/interpreter/mterp/x86_64/op_move_exception.S
+++ b/runtime/interpreter/mterp/x86_64/op_move_exception.S
@@ -1,5 +1,6 @@
     /* move-exception vAA */
-    movl    THREAD_EXCEPTION_OFFSET(rSELF), %eax
+    movq    rSELF, %rcx
+    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
     SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
-    movl    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
index cb0acb7..fdf5a50 100644
--- a/runtime/interpreter/mterp/x86_64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
@@ -13,10 +13,6 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL($func)
+    testl   %eax, %eax
     movslq  %eax, rINSTq
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue
-    GOTO_NEXT
+    jmp     MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/x86_64/op_return.S b/runtime/interpreter/mterp/x86_64/op_return.S
index 14f4f8a..07e0e53 100644
--- a/runtime/interpreter/mterp/x86_64/op_return.S
+++ b/runtime/interpreter/mterp/x86_64/op_return.S
@@ -6,9 +6,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void.S b/runtime/interpreter/mterp/x86_64/op_return_void.S
index 46a5753..6a12df3 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void.S
@@ -1,8 +1,8 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
index 92e3506..822b2e8 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
diff --git a/runtime/interpreter/mterp/x86_64/op_return_wide.S b/runtime/interpreter/mterp/x86_64/op_return_wide.S
index f2d6e04..288eb96 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_wide.S
@@ -4,9 +4,9 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
-    jz      1f
     movq    rSELF, OUT_ARG0
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
diff --git a/runtime/interpreter/mterp/x86_64/op_sget.S b/runtime/interpreter/mterp/x86_64/op_sget.S
index 38d9a5e..d39e6c4 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget.S
@@ -11,7 +11,8 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL($helper)
-    cmpl    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
     .if $is_object
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
diff --git a/runtime/interpreter/mterp/x86_64/op_throw.S b/runtime/interpreter/mterp/x86_64/op_throw.S
index 22ed990..8095c25 100644
--- a/runtime/interpreter/mterp/x86_64/op_throw.S
+++ b/runtime/interpreter/mterp/x86_64/op_throw.S
@@ -6,5 +6,6 @@
     GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
     testb   %al, %al
     jz      common_errNullObject
-    movq    %rax, THREAD_EXCEPTION_OFFSET(rSELF)
+    movq    rSELF, %rcx
+    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
     jmp     MterpException
diff --git a/runtime/interpreter/mterp/x86_64/zcmp.S b/runtime/interpreter/mterp/x86_64/zcmp.S
index 0051407..fb8ae6a 100644
--- a/runtime/interpreter/mterp/x86_64/zcmp.S
+++ b/runtime/interpreter/mterp/x86_64/zcmp.S
@@ -7,13 +7,11 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $$0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
-    movl    $$2, rINST                      # assume branch not taken
     j${revcmp}   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
+    testq   rINSTq, rINSTq
+    jmp     MterpCommonTakenBranch
 1:
-    MTERP_PROFILE_BRANCH
-    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
-    leaq    (rPC, rINSTq), rPC
-    FETCH_INST
-    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
-    GOTO_NEXT
+    cmpl    $$JIT_CHECK_OSR, rPROFILE
+    je      .L_check_not_taken_osr
+    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2