Add compiler option for counting hotness in compiled code.

For eventually easier profiling of boot classpath and
system server.

bug: 30934496
Test: 674-hotness-compiled

Change-Id: I0f63c644527b74f6ef2649f481c2a1c731bb9f21
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 1780b1d..2d82d79 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -60,6 +60,7 @@
       dump_cfg_append_(false),
       force_determinism_(false),
       deduplicate_code_(true),
+      count_hotness_in_compiled_code_(false),
       register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault),
       passes_to_run_(nullptr) {
 }
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 3f66029..18b0913 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -274,6 +274,10 @@
     return dump_stats_;
   }
 
+  bool CountHotnessInCompiledCode() const {
+    return count_hotness_in_compiled_code_;
+  }
+
  private:
   bool ParseDumpInitFailures(const std::string& option, std::string* error_msg);
   void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
@@ -336,6 +340,10 @@
   // Whether code should be deduplicated.
   bool deduplicate_code_;
 
+  // Whether compiled code should increment the hotness count of ArtMethod. Note that the increments
+  // won't be atomic for performance reasons, so we accept races, just like in interpreter.
+  bool count_hotness_in_compiled_code_;
+
   RegisterAllocator::Strategy register_allocation_strategy_;
 
   // If not null, specifies optimization passes which will be run instead of defaults.
diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h
index f97ab08..3b18db0 100644
--- a/compiler/driver/compiler_options_map-inl.h
+++ b/compiler/driver/compiler_options_map-inl.h
@@ -77,6 +77,9 @@
   }
   map.AssignIfExists(Base::VerboseMethods, &options->verbose_methods_);
   options->deduplicate_code_ = map.GetOrDefault(Base::DeduplicateCode);
+  if (map.Exists(Base::CountHotnessInCompiledCode)) {
+    options->count_hotness_in_compiled_code_ = true;
+  }
 
   if (map.Exists(Base::DumpTimings)) {
     options->dump_timings_ = true;
@@ -137,6 +140,9 @@
           .WithValueMap({{"false", false}, {"true", true}})
           .IntoKey(Map::DeduplicateCode)
 
+      .Define({"--count-hotness-in-compiled-code"})
+          .IntoKey(Map::CountHotnessInCompiledCode)
+
       .Define({"--dump-timings"})
           .IntoKey(Map::DumpTimings)
 
diff --git a/compiler/driver/compiler_options_map.def b/compiler/driver/compiler_options_map.def
index 2c56fd7..acddae7 100644
--- a/compiler/driver/compiler_options_map.def
+++ b/compiler/driver/compiler_options_map.def
@@ -58,6 +58,7 @@
 COMPILER_OPTIONS_KEY (std::string,                 RegisterAllocationStrategy)
 COMPILER_OPTIONS_KEY (ParseStringList<','>,        VerboseMethods)
 COMPILER_OPTIONS_KEY (bool,                        DeduplicateCode,        true)
+COMPILER_OPTIONS_KEY (Unit,                        CountHotnessInCompiledCode)
 COMPILER_OPTIONS_KEY (Unit,                        DumpTimings)
 COMPILER_OPTIONS_KEY (Unit,                        DumpStats)
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 13bbffa..1380596 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1488,6 +1488,14 @@
   MacroAssembler* masm = GetVIXLAssembler();
   __ Bind(&frame_entry_label_);
 
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    UseScratchRegisterScope temps(masm);
+    Register temp = temps.AcquireX();
+    __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+    __ Add(temp, temp, 1);
+    __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+  }
+
   bool do_overflow_check =
       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
   if (do_overflow_check) {
@@ -3501,6 +3509,15 @@
   HLoopInformation* info = block->GetLoopInformation();
 
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      Register temp1 = temps.AcquireX();
+      Register temp2 = temps.AcquireX();
+      __ Ldr(temp1, MemOperand(sp, 0));
+      __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
+      __ Add(temp2, temp2, 1);
+      __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
+    }
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 577fe00..18e7d1c 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2485,6 +2485,14 @@
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   __ Bind(&frame_entry_label_);
 
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    vixl32::Register temp = temps.Acquire();
+    __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+    __ Add(temp, temp, 1);
+    __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+  }
+
   if (HasEmptyFrame()) {
     return;
   }
@@ -2786,6 +2794,16 @@
   HLoopInformation* info = block->GetLoopInformation();
 
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      vixl32::Register temp = temps.Acquire();
+      __ Push(vixl32::Register(kMethodRegister));
+      GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
+      __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+      __ Add(temp, temp, 1);
+      __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+      __ Pop(vixl32::Register(kMethodRegister));
+    }
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5c8e46e..51fb4da 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1276,6 +1276,10 @@
 void CodeGeneratorMIPS::GenerateFrameEntry() {
   __ Bind(&frame_entry_label_);
 
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    LOG(WARNING) << "Unimplemented hotness update in mips backend";
+  }
+
   bool do_overflow_check =
       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips) || !IsLeafMethod();
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bcfe051..480b917 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1079,6 +1079,10 @@
 void CodeGeneratorMIPS64::GenerateFrameEntry() {
   __ Bind(&frame_entry_label_);
 
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    LOG(WARNING) << "Unimplemented hotness update in mips64 backend";
+  }
+
   bool do_overflow_check =
       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips64) || !IsLeafMethod();
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cbe9e0a..c52c7ff 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1061,6 +1061,11 @@
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
 
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(1));
+  }
+
   if (!skip_overflow_check) {
     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
     __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
@@ -1357,6 +1362,12 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+      __ pushl(EAX);
+      __ movl(EAX, Address(ESP, kX86WordSize));
+      __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1));
+      __ popl(EAX);
+    }
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 510eec4..ee5918d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1268,6 +1268,12 @@
       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
 
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    __ addw(Address(CpuRegister(kMethodRegisterArgument),
+                    ArtMethod::HotnessCountOffset().Int32Value()),
+            Immediate(1));
+  }
+
   if (!skip_overflow_check) {
     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
@@ -1459,6 +1465,11 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
+      __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
+              Immediate(1));
+    }
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }