Prevent overflow for AOT hotness counters

Previous, the addition did not have a check for overflow and might wrap
around since the counter is only 16 bits.

Modified the test to exercise this.

The slowdown from fixing the overflow is 2% average on golem arm32/64.
Overall this brings the slowdown from the counter to ~15% from ~13%.

The benchmarks that regress the most are loopy ones that I would
consider non-representative. Code size increases by 0.6%.

Bug: 139883463
Test: test/run-test --host --64 --prebuild 674-hotness-compiled
Test: test/run-test --host --prebuild 674-hotness-compiled
Test: test/run-test --64 --prebuild 674-hotness-compiled
Test: test/run-test ---prebuild 674-hotness-compiled

Change-Id: Icf0ab2aedbc40ab10c9d952ce0f9c7b5e5feaf15
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a299ece..269b4e9 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1068,8 +1068,10 @@
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     UseScratchRegisterScope temps(masm);
     Register temp = temps.AcquireX();
-    __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-    __ Add(temp, temp, 1);
+    __ Ldrsh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+    __ Adds(temp, temp, 1);
+    // Subtract carry if it overflowed.
+    __ Sbc(temp, temp, xzr);
     __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
   }
 
@@ -3175,8 +3177,10 @@
       Register temp1 = temps.AcquireX();
       Register temp2 = temps.AcquireX();
       __ Ldr(temp1, MemOperand(sp, 0));
-      __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
-      __ Add(temp2, temp2, 1);
+      __ Ldrsh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
+      __ Adds(temp2, temp2, 1);
+      // Subtract carry if it overflowed.
+      __ Sbc(temp2, temp2, xzr);
       __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
     }
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b3141bf..0ddae52 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2087,8 +2087,12 @@
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     UseScratchRegisterScope temps(GetVIXLAssembler());
     vixl32::Register temp = temps.Acquire();
-    __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-    __ Add(temp, temp, 1);
+    static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
+    // Load with sign extend to set the high bits for integer overflow check.
+    __ Ldrsh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+    __ Adds(temp, temp, 1);
+    // Subtract carry if it overflowed.
+    __ Sbc(temp, temp, 0);
     __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
   }
 
@@ -2497,8 +2501,11 @@
       vixl32::Register temp = temps.Acquire();
       __ Push(vixl32::Register(kMethodRegister));
       GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
-      __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
-      __ Add(temp, temp, 1);
+      // Load with sign extend to set the high bits for integer overflow check.
+      __ Ldrsh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+      __ Adds(temp, temp, 1);
+      // Subtract carry if it overflowed.
+      __ Sbc(temp, temp, 0);
       __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
       __ Pop(vixl32::Register(kMethodRegister));
     }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4ab398e..797fe32 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1078,8 +1078,15 @@
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
 
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
-    __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()),
+    NearLabel overflow;
+    __ cmpw(Address(kMethodRegisterArgument,
+                    ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(ArtMethod::MaxCounter()));
+    __ j(kEqual, &overflow);
+    __ addw(Address(kMethodRegisterArgument,
+                    ArtMethod::HotnessCountOffset().Int32Value()),
             Immediate(1));
+    __ Bind(&overflow);
   }
 
   if (!skip_overflow_check) {
@@ -1385,7 +1392,13 @@
     if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
       __ pushl(EAX);
       __ movl(EAX, Address(ESP, kX86WordSize));
-      __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1));
+          NearLabel overflow;
+      __ cmpw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()),
+              Immediate(ArtMethod::MaxCounter()));
+      __ j(kEqual, &overflow);
+      __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()),
+              Immediate(1));
+      __ Bind(&overflow);
       __ popl(EAX);
     }
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a75c745..2d51f32 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1349,9 +1349,15 @@
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
 
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    NearLabel overflow;
+    __ cmpw(Address(CpuRegister(kMethodRegisterArgument),
+                    ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(ArtMethod::MaxCounter()));
+    __ j(kEqual, &overflow);
     __ addw(Address(CpuRegister(kMethodRegisterArgument),
                     ArtMethod::HotnessCountOffset().Int32Value()),
             Immediate(1));
+    __ Bind(&overflow);
   }
 
   if (!skip_overflow_check) {
@@ -1547,8 +1553,13 @@
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
+      NearLabel overflow;
+      __ cmpw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
+              Immediate(ArtMethod::MaxCounter()));
+      __ j(kEqual, &overflow);
       __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
               Immediate(1));
+      __ Bind(&overflow);
     }
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 6bbccb8..3aec1c3 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -405,7 +405,7 @@
   return CodeItemDebugInfoAccessor(*GetDexFile(), GetCodeItem(), GetDexMethodIndex());
 }
 
-inline void ArtMethod::SetCounter(int16_t hotness_count) {
+inline void ArtMethod::SetCounter(uint16_t hotness_count) {
   DCHECK(!IsAbstract()) << PrettyMethod();
   hotness_count_ = hotness_count;
 }
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 450fe41..d84ea7c 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_ART_METHOD_H_
 
 #include <cstddef>
+#include <limits>
 
 #include <android-base/logging.h>
 #include <jni.h>
@@ -673,10 +674,14 @@
   void CopyFrom(ArtMethod* src, PointerSize image_pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ALWAYS_INLINE void SetCounter(int16_t hotness_count) REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetCounter(uint16_t hotness_count) REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE uint16_t GetCounter() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  ALWAYS_INLINE static constexpr uint16_t MaxCounter() {
+    return std::numeric_limits<decltype(hotness_count_)>::max();
+  }
+
   ALWAYS_INLINE uint32_t GetImtIndex() REQUIRES_SHARED(Locks::mutator_lock_);
 
   void CalculateAndSetImtIndex() REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/test/674-hotness-compiled/src/Main.java b/test/674-hotness-compiled/src/Main.java
index 76ec927..5f0d10a 100644
--- a/test/674-hotness-compiled/src/Main.java
+++ b/test/674-hotness-compiled/src/Main.java
@@ -18,8 +18,8 @@
   public static void $noinline$hotnessCount() {
   }
 
-  public static void $noinline$hotnessCountWithLoop() {
-    for (int i = 0; i < 100; i++) {
+  public static void $noinline$hotnessCountWithLoop(int count) {
+    for (int i = 0; i < count; i++) {
       $noinline$hotnessCount();
     }
   }
@@ -35,9 +35,17 @@
       throw new Error("Expected hotness counter to be updated");
     }
 
-    $noinline$hotnessCountWithLoop();
-    if (getHotnessCounter(Main.class, "$noinline$hotnessCountWithLoop") <= counter) {
-      throw new Error("Expected hotness counter of a loop to be greater than without loop");
+    $noinline$hotnessCountWithLoop(1000);
+    int newCounter = getHotnessCounter(Main.class, "$noinline$hotnessCountWithLoop");
+    if (newCounter <= counter) {
+      throw new Error("Expected counter " + newCounter + " to be larger than " + counter);
+    }
+    counter = newCounter;
+
+    $noinline$hotnessCountWithLoop(65500);
+    newCounter = getHotnessCounter(Main.class, "$noinline$hotnessCountWithLoop");
+    if (newCounter <= counter) {
+      throw new Error("Expected counter " + newCounter + " to be larger than " + counter);
     }
   }