Reduce the number of calls to Jit::AddSamples.

The method is called for every invoke which is expensive.
Add samples, but don't check the consequences every time.

This reduces its cost from 3.5% to 1% (maps on device).

Test: ./art/test.py -b -r --host --64
Change-Id: Ie90da905d067b5d85a3c494c7c9af81b24dc8135
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index bf84227..1c72421 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -44,7 +44,7 @@
 #include "handle_scope-inl.h"
 #include "interpreter_mterp_impl.h"
 #include "interpreter_switch_impl.h"
-#include "jit/jit.h"
+#include "jit/jit-inl.h"
 #include "mirror/call_site.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
diff --git a/runtime/interpreter/interpreter_switch_impl-inl.h b/runtime/interpreter/interpreter_switch_impl-inl.h
index 94cb3de..aec2aa2 100644
--- a/runtime/interpreter/interpreter_switch_impl-inl.h
+++ b/runtime/interpreter/interpreter_switch_impl-inl.h
@@ -26,7 +26,7 @@
 #include "dex/dex_instruction_list.h"
 #include "experimental_flags.h"
 #include "interpreter_common.h"
-#include "jit/jit.h"
+#include "jit/jit-inl.h"
 #include "jvalue-inl.h"
 #include "mirror/string-alloc-inl.h"
 #include "nth_caller_visitor.h"
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 878d921..d6e42f3 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -879,9 +879,11 @@
   int32_t countdown_value = jit::kJitHotnessDisabled;
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr) {
-    int32_t warm_threshold = jit->WarmMethodThreshold();
-    int32_t hot_threshold = jit->HotMethodThreshold();
-    int32_t osr_threshold = jit->OSRMethodThreshold();
+    // We need to add batch size to ensure the threshold gets passed even after rounding.
+    constexpr int32_t kBatchSize = jit::kJitSamplesBatchSize;
+    int32_t warm_threshold = static_cast<int32_t>(jit->WarmMethodThreshold()) + kBatchSize;
+    int32_t hot_threshold = static_cast<int32_t>(jit->HotMethodThreshold()) + kBatchSize;
+    int32_t osr_threshold = static_cast<int32_t>(jit->OSRMethodThreshold()) + kBatchSize;
     if (hotness_count < warm_threshold) {
       countdown_value = warm_threshold - hotness_count;
     } else if (hotness_count < hot_threshold) {
diff --git a/runtime/jit/jit-inl.h b/runtime/jit/jit-inl.h
new file mode 100644
index 0000000..80324ad
--- /dev/null
+++ b/runtime/jit/jit-inl.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_JIT_INL_H_
+#define ART_RUNTIME_JIT_JIT_INL_H_
+
+#include "jit/jit.h"
+
+#include "art_method.h"
+#include "base/bit_utils.h"
+#include "thread.h"
+#include "runtime-inl.h"
+
+namespace art {
+namespace jit {
+
+inline bool Jit::ShouldUsePriorityThreadWeight(Thread* self) {
+  return self->IsJitSensitiveThread() && Runtime::Current()->InJankPerceptibleProcessState();
+}
+
+inline void Jit::AddSamples(Thread* self,
+                            ArtMethod* method,
+                            uint16_t samples,
+                            bool with_backedges) {
+  if (Jit::ShouldUsePriorityThreadWeight(self)) {
+    samples *= PriorityThreadWeight();
+  }
+  uint32_t old_count = method->GetCounter();
+  uint32_t new_count = old_count + samples;
+
+  // The full check is fairly expensive so we just add to hotness most of the time,
+  // and we do the full check only when some of the higher bits of the count change.
+  // NB: The method needs to see the transitions of the counter past the thresholds.
+  uint32_t old_batch = RoundDown(old_count, kJitSamplesBatchSize);  // Clear lower bits.
+  uint32_t new_batch = RoundDown(new_count, kJitSamplesBatchSize);  // Clear lower bits.
+  if (UNLIKELY(old_batch == 0)) {
+    // For low sample counts, we check every time (which is important for tests).
+    if (!MaybeCompileMethod(self, method, old_count, new_count, with_backedges)) {
+      // Tests may check that the counter is 0 for methods that we never compile.
+      return;  // Ignore the samples for now and retry later.
+    }
+  } else if (UNLIKELY(old_batch != new_batch)) {
+    // For high sample counts, we check only when we move past the batch boundary.
+    if (!MaybeCompileMethod(self, method, old_batch, new_batch, with_backedges)) {
+      // OSR compilation will ignore the samples if they don't have backedges.
+      return;  // Ignore the samples for now and retry later.
+    }
+  }
+
+  method->SetCounter(new_count);
+}
+
+}  // namespace jit
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_JIT_INL_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 4a3ef07..2fc418b 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -28,6 +28,7 @@
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
+#include "jit-inl.h"
 #include "jit_code_cache.h"
 #include "jni/java_vm_ext.h"
 #include "mirror/method_handle_impl.h"
@@ -149,10 +150,6 @@
   return jit_options;
 }
 
-bool Jit::ShouldUsePriorityThreadWeight(Thread* self) {
-  return self->IsJitSensitiveThread() && Runtime::Current()->InJankPerceptibleProcessState();
-}
-
 void Jit::DumpInfo(std::ostream& os) {
   code_cache_->Dump(os);
   cumulative_timings_.Dump(os);
@@ -628,21 +625,25 @@
   return false;
 }
 
-void Jit::AddSamples(Thread* self, ArtMethod* method, uint16_t count, bool with_backedges) {
+bool Jit::MaybeCompileMethod(Thread* self,
+                             ArtMethod* method,
+                             uint32_t old_count,
+                             uint32_t new_count,
+                             bool with_backedges) {
   if (thread_pool_ == nullptr) {
     // Should only see this when shutting down, starting up, or in zygote, which doesn't
     // have a thread pool.
     DCHECK(Runtime::Current()->IsShuttingDown(self) ||
            !Runtime::Current()->IsFinishedStarting() ||
            Runtime::Current()->IsZygote());
-    return;
+    return false;
   }
   if (IgnoreSamplesForMethod(method)) {
-    return;
+    return false;
   }
   if (HotMethodThreshold() == 0) {
     // Tests might request JIT on first use (compiled synchronously in the interpreter).
-    return;
+    return false;
   }
   DCHECK(thread_pool_ != nullptr);
   DCHECK_GT(WarmMethodThreshold(), 0);
@@ -651,15 +652,9 @@
   DCHECK_GE(PriorityThreadWeight(), 1);
   DCHECK_LE(PriorityThreadWeight(), HotMethodThreshold());
 
-  uint16_t starting_count = method->GetCounter();
-  if (Jit::ShouldUsePriorityThreadWeight(self)) {
-    count *= PriorityThreadWeight();
-  }
-  uint32_t new_count = starting_count + count;
-  // Note: Native method have no "warm" state or profiling info.
-  if (LIKELY(!method->IsNative()) && starting_count < WarmMethodThreshold()) {
-    if ((new_count >= WarmMethodThreshold()) &&
-        (method->GetProfilingInfo(kRuntimePointerSize) == nullptr)) {
+  if (old_count < WarmMethodThreshold() && new_count >= WarmMethodThreshold()) {
+    // Note: Native method have no "warm" state or profiling info.
+    if (!method->IsNative() && method->GetProfilingInfo(kRuntimePointerSize) == nullptr) {
       bool success = ProfilingInfo::Create(self, method, /* retry_allocation= */ false);
       if (success) {
         VLOG(jit) << "Start profiling " << method->PrettyMethod();
@@ -669,7 +664,7 @@
         // Calling ProfilingInfo::Create might put us in a suspended state, which could
         // lead to the thread pool being deleted when we are shutting down.
         DCHECK(Runtime::Current()->IsShuttingDown(self));
-        return;
+        return false;
       }
 
       if (!success) {
@@ -678,31 +673,26 @@
         thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
       }
     }
-    // Avoid jumping more than one state at a time.
-    new_count = std::min(new_count, static_cast<uint32_t>(HotMethodThreshold() - 1));
-  } else if (UseJitCompilation()) {
-    if (starting_count < HotMethodThreshold()) {
-      if ((new_count >= HotMethodThreshold()) &&
-          !code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+  }
+  if (UseJitCompilation()) {
+    if (old_count < HotMethodThreshold() && new_count >= HotMethodThreshold()) {
+      if (!code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
         DCHECK(thread_pool_ != nullptr);
         thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
       }
-      // Avoid jumping more than one state at a time.
-      new_count = std::min(new_count, static_cast<uint32_t>(OSRMethodThreshold() - 1));
-    } else if (starting_count < OSRMethodThreshold()) {
+    }
+    if (old_count < OSRMethodThreshold() && new_count >= OSRMethodThreshold()) {
       if (!with_backedges) {
-        // If the samples don't contain any back edge, we don't increment the hotness.
-        return;
+        return false;
       }
       DCHECK(!method->IsNative());  // No back edges reported for native methods.
-      if ((new_count >= OSRMethodThreshold()) &&  !code_cache_->IsOsrCompiled(method)) {
+      if (!code_cache_->IsOsrCompiled(method)) {
         DCHECK(thread_pool_ != nullptr);
         thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
       }
     }
   }
-  // Update hotness counter
-  method->SetCounter(new_count);
+  return true;
 }
 
 class ScopedSetRuntimeThread {
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e12b032..2027719 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -47,6 +47,7 @@
 // At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
 // See android/os/Process.java.
 static constexpr int kJitPoolThreadPthreadDefaultPriority = 9;
+static constexpr uint32_t kJitSamplesBatchSize = 32;  // Must be power of 2.
 
 class JitOptions {
  public:
@@ -217,7 +218,10 @@
   void MethodEntered(Thread* thread, ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples, bool with_backedges)
+  ALWAYS_INLINE void AddSamples(Thread* self,
+                                ArtMethod* method,
+                                uint16_t samples,
+                                bool with_backedges)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(ObjPtr<mirror::Object> this_object,
@@ -291,6 +295,15 @@
  private:
   Jit(JitCodeCache* code_cache, JitOptions* options);
 
+  // Compile the method if the number of samples passes a threshold.
+  // Returns false if we can not compile now - don't increment the counter and retry later.
+  bool MaybeCompileMethod(Thread* self,
+                          ArtMethod* method,
+                          uint32_t old_count,
+                          uint32_t new_count,
+                          bool with_backedges)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static bool BindCompilerMethods(std::string* error_msg);
 
   // JIT compiler