ART: Reduce overhead of mterp OSR checking

Once mterp's branch profiling identifies a method hot enough
to try on-stack replacement, a request is made to the JIT to compile
the method for OSR.  Mterp then enters a mode in which it checks
for completion of the compilation in order to initiate the OSR.
Currently, this completion check happens on every branch. In
situations in which the JIT is backlogged and it takes awhile for the
compilation to complete, the overhead of doing these checks is noticable.

This change moves from a "check on every branch" model to a "check
on every Nth branch" model.  We start with N=100, which should still
yield responsive OSR but dramatically reduce the checking overhead.

Bug: 32090348
Test: m test-art-host
Test: m test-art-target (Nexus 5x)
Change-Id: I97442723397bb242163dc18bd4444977bcd469fa
diff --git a/runtime/interpreter/mterp/mips64/bincmp.S b/runtime/interpreter/mterp/mips64/bincmp.S
index 07b1210..c2bca91 100644
--- a/runtime/interpreter/mterp/mips64/bincmp.S
+++ b/runtime/interpreter/mterp/mips64/bincmp.S
@@ -6,7 +6,6 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
diff --git a/runtime/interpreter/mterp/mips64/op_packed_switch.S b/runtime/interpreter/mterp/mips64/op_packed_switch.S
index 27ce580..44e77a4 100644
--- a/runtime/interpreter/mterp/mips64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips64/op_packed_switch.S
@@ -10,7 +10,6 @@
      */
     /* op vAA, +BBBBBBBB */
     .extern $func
-    .extern MterpProfileBranch
     lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
     lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
     srl     a3, rINST, 8                # a3 <- AA
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 369c261..75ab91a 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -768,38 +768,32 @@
   return MterpSetUpHotnessCountdown(method, shadow_frame);
 }
 
-// TUNING: Unused by arm/arm64/x86/x86_64.  Remove when mips/mips64 mterps support batch updates.
-extern "C" size_t MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  ArtMethod* method = shadow_frame->GetMethod();
-  JValue* result = shadow_frame->GetResultRegister();
-  uint32_t dex_pc = shadow_frame->GetDexPC();
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if ((jit != nullptr) && (offset <= 0)) {
-    jit->AddSamples(self, method, 1, /*with_backedges*/ true);
-  }
-  int16_t countdown_value = MterpSetUpHotnessCountdown(method, shadow_frame);
-  if (countdown_value == jit::kJitCheckForOSR) {
-    return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
-  } else {
-    return false;
-  }
-}
-
 extern "C" size_t MterpMaybeDoOnStackReplacement(Thread* self,
                                                  ShadowFrame* shadow_frame,
                                                  int32_t offset)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ArtMethod* method = shadow_frame->GetMethod();
-  JValue* result = shadow_frame->GetResultRegister();
-  uint32_t dex_pc = shadow_frame->GetDexPC();
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (offset <= 0) {
-    // Keep updating hotness in case a compilation request was dropped.  Eventually it will retry.
-    jit->AddSamples(self, method, 1, /*with_backedges*/ true);
+  int16_t osr_countdown = shadow_frame->GetCachedHotnessCountdown() - 1;
+  bool did_osr = false;
+  /*
+   * To reduce the cost of polling the compiler to determine whether the requested OSR
+   * compilation has completed, only check every Nth time.  NOTE: the "osr_countdown <= 0"
+   * condition is satisfied either by the decrement below or the initial setting of
+   * the cached countdown field to kJitCheckForOSR, which elsewhere is asserted to be -1.
+   */
+  if (osr_countdown <= 0) {
+    ArtMethod* method = shadow_frame->GetMethod();
+    JValue* result = shadow_frame->GetResultRegister();
+    uint32_t dex_pc = shadow_frame->GetDexPC();
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    osr_countdown = jit::Jit::kJitRecheckOSRThreshold;
+    if (offset <= 0) {
+      // Keep updating hotness in case a compilation request was dropped.  Eventually it will retry.
+      jit->AddSamples(self, method, osr_countdown, /*with_backedges*/ true);
+    }
+    did_osr = jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
   }
-  // Assumes caller has already determined that an OSR check is appropriate.
-  return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+  shadow_frame->SetCachedHotnessCountdown(osr_countdown);
+  return did_osr;
 }
 
 }  // namespace interpreter
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index bf09666..013bb32 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -1174,7 +1174,6 @@
      */
     /* op vAA, +BBBBBBBB */
     .extern MterpDoPackedSwitch
-    .extern MterpProfileBranch
     lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
     lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
     srl     a3, rINST, 8                # a3 <- AA
@@ -1201,7 +1200,6 @@
      */
     /* op vAA, +BBBBBBBB */
     .extern MterpDoSparseSwitch
-    .extern MterpProfileBranch
     lh      a0, 2(rPC)                  # a0 <- bbbb (lo)
     lh      a1, 4(rPC)                  # a1 <- BBBB (hi)
     srl     a3, rINST, 8                # a3 <- AA
@@ -1396,7 +1394,6 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
@@ -1423,7 +1420,6 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
@@ -1450,7 +1446,6 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
@@ -1477,7 +1472,6 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
@@ -1504,7 +1498,6 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
@@ -1531,7 +1524,6 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-    .extern MterpProfileBranch
     ext     a2, rINST, 8, 4             # a2 <- A
     ext     a3, rINST, 12, 4            # a3 <- B
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 4112142..d566799 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -54,6 +54,8 @@
   static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 10000;
   static constexpr size_t kDefaultPriorityThreadWeightRatio = 1000;
   static constexpr size_t kDefaultInvokeTransitionWeightRatio = 500;
+  // How frequently should the interpreter check to see if OSR compilation is ready.
+  static constexpr int16_t kJitRecheckOSRThreshold = 100;
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
diff --git a/tools/cpp-define-generator/constant_jit.def b/tools/cpp-define-generator/constant_jit.def
index 5fa5194..82cdbb2 100644
--- a/tools/cpp-define-generator/constant_jit.def
+++ b/tools/cpp-define-generator/constant_jit.def
@@ -25,5 +25,6 @@
 
 DEFINE_JIT_CONSTANT(CHECK_OSR,       int16_t, art::jit::kJitCheckForOSR)
 DEFINE_JIT_CONSTANT(HOTNESS_DISABLE, int16_t, art::jit::kJitHotnessDisabled)
+DEFINE_JIT_CONSTANT(CHECK_OSR_THRESHOLD, int16_t, art::jit::Jit::kJitRecheckOSRThreshold)
 
 #undef DEFINE_JIT_CONSTANT