Revert^2 "Add support for calling entry / exit hooks directly from JIT code""

This reverts commit 72be14ed06b76cd0e83392145cec9025ff43d174.

Reason for revert: A reland of
commit 2d4feeb67912d64b9e980e6687794826a5c22f9d with a fix for no-image
tests

Change-Id: I79f719f0d4d9b903db301a1636fde5689da35a29
diff --git a/CleanSpec.mk b/CleanSpec.mk
index 007c0cc..48974f3 100644
--- a/CleanSpec.mk
+++ b/CleanSpec.mk
@@ -117,6 +117,7 @@
 
 # Remove all dex2oat artifacts (workaround for broken dependencies).
 $(call add-clean-step, find $(OUT_DIR) -name "*.oat" -o -name "*.odex" -o -name "*.art" -o -name '*.vdex' | xargs rm -f)
+$(call add-clean-step, find $(OUT_DIR) -name "*.oat" -o -name "*.odex" -o -name "*.art" -o -name '*.vdex' | xargs rm -f)
 
 # ************************************************
 # NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 74efc9e..d455614 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -822,6 +822,31 @@
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
 };
 
+class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction)
+      : SlowPathCodeARM64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    QuickEntrypointEnum entry_point =
+        (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+    arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const override {
+    return "MethodEntryExitHooksSlowPath";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
@@ -1113,6 +1138,47 @@
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
 }
 
+void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  DataType::Type return_type = method_hook->InputAt(0)->GetType();
+  locations->SetInAt(0, ARM64ReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
+  MacroAssembler* masm = GetVIXLAssembler();
+  UseScratchRegisterScope temps(masm);
+  Register temp = temps.AcquireX();
+  Register value = temps.AcquireW();
+
+  SlowPathCodeARM64* slow_path =
+      new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
+  int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+  __ Mov(temp, address + offset);
+  __ Ldrh(value, MemOperand(temp, 0));
+  __ Cbnz(value, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorARM64::VisitMethodExitHook(HMethodExitHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
+void LocationsBuilderARM64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
+  new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
 void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
   MacroAssembler* masm = GetVIXLAssembler();
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index d4546e5..750151a 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -388,6 +388,7 @@
   void GenerateIntRemForConstDenom(HRem *instruction);
   void GenerateIntRemForPower2Denom(HRem *instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenerateMethodEntryExitHook(HInstruction* instruction);
 
   // Helpers to set up locations for vector memory operations. Returns the memory operand and,
   // if used, sets the output parameter scratch to a temporary register used in this operand,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 700202b..bf0c77d 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -971,6 +971,31 @@
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
 };
 
+class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit MethodEntryExitHooksSlowPathARMVIXL(HInstruction* instruction)
+      : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    QuickEntrypointEnum entry_point =
+        (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+    arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const override {
+    return "MethodEntryExitHooksSlowPath";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
+};
+
 inline vixl32::Condition ARMCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return eq;
@@ -2111,6 +2136,44 @@
   }
 }
 
+void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+
+  SlowPathCodeARMVIXL* slow_path =
+      new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+  uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
+  __ Mov(temp, address + offset);
+  __ Ldrh(temp, MemOperand(temp, 0));
+  __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
+void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
+  new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
 void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     UseScratchRegisterScope temps(GetVIXLAssembler());
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index b797c30..aa40755 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -431,6 +431,7 @@
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemConstantIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenerateMethodEntryExitHook(HInstruction* instruction);
 
   vixl::aarch32::MemOperand VecAddress(
       HVecMemoryOperation* instruction,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c49b08b..a04b412 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -942,6 +942,30 @@
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
 };
 
+class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
+ public:
+  explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    QuickEntrypointEnum entry_point =
+        (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+    x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const override {
+    return "MethodEntryExitHooksSlowPath";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
+};
+
 #undef __
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
@@ -1097,6 +1121,70 @@
   return dwarf::Reg::X86Core(static_cast<int>(reg));
 }
 
+void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
+  switch (ret->InputAt(0)->GetType()) {
+    case DataType::Type::kReference:
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+    case DataType::Type::kInt32:
+      locations->SetInAt(0, Location::RegisterLocation(EAX));
+      break;
+
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
+      break;
+
+    case DataType::Type::kVoid:
+      locations->SetInAt(0, Location::NoLocation());
+      break;
+
+    default:
+      LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
+  }
+}
+
+void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  SetInForReturnValue(method_hook, locations);
+}
+
+void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
+  SlowPathCode* slow_path =
+      new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
+  int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+  __ cmpw(Address::Absolute(address + offset), Immediate(0));
+  __ j(kEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
+void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
+  new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     Register reg = EAX;
@@ -2408,31 +2496,7 @@
 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
-  switch (ret->InputAt(0)->GetType()) {
-    case DataType::Type::kReference:
-    case DataType::Type::kBool:
-    case DataType::Type::kUint8:
-    case DataType::Type::kInt8:
-    case DataType::Type::kUint16:
-    case DataType::Type::kInt16:
-    case DataType::Type::kInt32:
-      locations->SetInAt(0, Location::RegisterLocation(EAX));
-      break;
-
-    case DataType::Type::kInt64:
-      locations->SetInAt(
-          0, Location::RegisterPairLocation(EAX, EDX));
-      break;
-
-    case DataType::Type::kFloat32:
-    case DataType::Type::kFloat64:
-      locations->SetInAt(
-          0, Location::FpuRegisterLocation(XMM0));
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
-  }
+  SetInForReturnValue(ret, locations);
 }
 
 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 94f010e..75c5ceb 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -344,6 +344,8 @@
   bool CpuHasAvxFeatureFlag();
   bool CpuHasAvx2FeatureFlag();
 
+  void GenerateMethodEntryExitHook(HInstruction* instruction);
+
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dae2ae2..4ec2dd7 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -965,6 +965,31 @@
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
 };
 
+class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
+ public:
+  explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
+      : SlowPathCode(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    QuickEntrypointEnum entry_point =
+        (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+    x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const override {
+    return "MethodEntryExitHooksSlowPath";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
+};
+
 #undef __
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
@@ -1494,6 +1519,68 @@
   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
 }
 
+void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
+  new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
+  SlowPathCode* slow_path =
+      new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
+  int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+  __ movq(CpuRegister(TMP), Immediate(address + offset));
+  __ cmpw(Address(CpuRegister(TMP), 0), Immediate(0));
+  __ j(kNotEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
+void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
+  switch (instr->InputAt(0)->GetType()) {
+    case DataType::Type::kReference:
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RegisterLocation(RAX));
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
+      break;
+
+    case DataType::Type::kVoid:
+      locations->SetInAt(0, Location::NoLocation());
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  SetInForReturnValue(method_hook, locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
+  DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+  DCHECK(codegen_->RequiresCurrentMethod());
+  GenerateMethodEntryExitHook(instruction);
+}
+
 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     NearLabel overflow;
@@ -2542,26 +2629,7 @@
 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
-  switch (ret->InputAt(0)->GetType()) {
-    case DataType::Type::kReference:
-    case DataType::Type::kBool:
-    case DataType::Type::kUint8:
-    case DataType::Type::kInt8:
-    case DataType::Type::kUint16:
-    case DataType::Type::kInt16:
-    case DataType::Type::kInt32:
-    case DataType::Type::kInt64:
-      locations->SetInAt(0, Location::RegisterLocation(RAX));
-      break;
-
-    case DataType::Type::kFloat32:
-    case DataType::Type::kFloat64:
-      locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
-  }
+  SetInForReturnValue(ret, locations);
 }
 
 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 3e601bb..1115c83 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -276,6 +276,7 @@
   void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
   void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
   void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+  void GenerateMethodEntryExitHook(HInstruction* instruction);
 
   // Generate a heap reference load using one register `out`:
   //
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0e4f9ef..17957d8 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -812,6 +812,11 @@
                            HBasicBlock* bb_cursor) {
   HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetAllocator())
       HShouldDeoptimizeFlag(graph_->GetAllocator(), dex_pc);
+  // ShouldDeoptimizeFlag is used to perform a deoptimization because of a CHA
+  // invalidation or for debugging reasons. It is OK to just check for non-zero
+  // value here instead of the specific CHA value. When a debugging deopt is
+  // requested we deoptimize before we execute any code and hence we shouldn't
+  // see that case here.
   HInstruction* compare = new (graph_->GetAllocator()) HNotEqual(
       deopt_flag, graph_->GetIntConstant(0, dex_pc));
   HInstruction* deopt = new (graph_->GetAllocator()) HDeoptimize(
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 390a2bb..ed760f1 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -372,6 +372,9 @@
     if (current_block_->IsEntryBlock()) {
       InitializeParameters();
       AppendInstruction(new (allocator_) HSuspendCheck(0u));
+      if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+        AppendInstruction(new (allocator_) HMethodEntryHook(0u));
+      }
       AppendInstruction(new (allocator_) HGoto(0u));
       continue;
     } else if (current_block_->IsExitBlock()) {
@@ -822,10 +825,18 @@
           compilation_stats_,
           MethodCompilationStat::kConstructorFenceGeneratedFinal);
     }
+    if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+      // Return value is not used for void functions. We pass NullConstant to
+      // avoid special cases when generating code.
+      AppendInstruction(new (allocator_) HMethodExitHook(graph_->GetNullConstant(), dex_pc));
+    }
     AppendInstruction(new (allocator_) HReturnVoid(dex_pc));
   } else {
     DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_));
     HInstruction* value = LoadLocal(instruction.VRegA(), type);
+    if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+      AppendInstruction(new (allocator_) HMethodExitHook(value, dex_pc));
+    }
     AppendInstruction(new (allocator_) HReturn(value, dex_pc));
   }
   current_block_ = nullptr;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 17080f0..2478693 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2913,7 +2913,10 @@
     } else if (current->IsCurrentMethod()) {
       replacement = outer_graph->GetCurrentMethod();
     } else {
-      DCHECK(current->IsGoto() || current->IsSuspendCheck());
+      // It is OK to ignore MethodEntryHook for inlined functions.
+      // In debug mode we don't inline and in release mode method
+      // tracing is best effort so OK to ignore them.
+      DCHECK(current->IsGoto() || current->IsSuspendCheck() || current->IsMethodEntryHook());
       entry_block_->RemoveInstruction(current);
     }
     if (replacement != nullptr) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6ef29bf..16e26dc 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -21,6 +21,7 @@
 #include <array>
 #include <type_traits>
 
+#include "art_method.h"
 #include "base/arena_allocator.h"
 #include "base/arena_bit_vector.h"
 #include "base/arena_containers.h"
@@ -32,7 +33,6 @@
 #include "base/quasi_atomic.h"
 #include "base/stl_util.h"
 #include "base/transform_array_ref.h"
-#include "art_method.h"
 #include "block_namer.h"
 #include "class_root.h"
 #include "compilation_kind.h"
@@ -680,7 +680,7 @@
   }
 
   bool HasShouldDeoptimizeFlag() const {
-    return number_of_cha_guards_ != 0;
+    return number_of_cha_guards_ != 0 || debuggable_;
   }
 
   bool HasTryCatch() const { return has_try_catch_; }
@@ -1530,6 +1530,8 @@
   M(LongConstant, Constant)                                             \
   M(Max, Instruction)                                                   \
   M(MemoryBarrier, Instruction)                                         \
+  M(MethodEntryHook, Instruction)                                       \
+  M(MethodExitHook, Instruction)                                        \
   M(Min, BinaryOperation)                                               \
   M(MonitorOperation, Instruction)                                      \
   M(Mul, BinaryOperation)                                               \
@@ -2994,6 +2996,38 @@
   friend class SsaBuilder;
 };
 
+class HMethodEntryHook : public HExpression<0> {
+ public:
+  explicit HMethodEntryHook(uint32_t dex_pc)
+      : HExpression(kMethodEntryHook, SideEffects::All(), dex_pc) {}
+
+  bool NeedsEnvironment() const override {
+    return true;
+  }
+
+  DECLARE_INSTRUCTION(MethodEntryHook);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(MethodEntryHook);
+};
+
+class HMethodExitHook : public HExpression<1> {
+ public:
+  HMethodExitHook(HInstruction* value, uint32_t dex_pc)
+      : HExpression(kMethodExitHook, SideEffects::All(), dex_pc) {
+    SetRawInputAt(0, value);
+  }
+
+  bool NeedsEnvironment() const override {
+    return true;
+  }
+
+  DECLARE_INSTRUCTION(MethodExitHook);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(MethodExitHook);
+};
+
 // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
 // instruction that branches to the exit block.
 class HReturnVoid final : public HExpression<0> {
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 8663d8b..7bcff2b 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -505,7 +505,7 @@
   EXPECT_EQ(64U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(4U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(167 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+  EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
 
diff --git a/openjdkjvmti/deopt_manager.cc b/openjdkjvmti/deopt_manager.cc
index bf1b4f0..cf28a71 100644
--- a/openjdkjvmti/deopt_manager.cc
+++ b/openjdkjvmti/deopt_manager.cc
@@ -492,7 +492,12 @@
                                          art::gc::GcCause::kGcCauseDebugger,
                                          art::gc::CollectorType::kCollectorTypeDebugger);
   art::ScopedSuspendAll ssa("Instrument thread stack");
-  art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(target);
+  // Prepare the stack so methods can be deoptimized as and when required.
+  // This by itself doesn't cause any methods to deoptimize but enables
+  // deoptimization on demand.
+  art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(
+      target,
+      /* deopt_all_frames= */ false);
 }
 
 extern DeoptManager* gDeoptManager;
diff --git a/openjdkjvmti/ti_heap.cc b/openjdkjvmti/ti_heap.cc
index 27fed28..bd9d2dd 100644
--- a/openjdkjvmti/ti_heap.cc
+++ b/openjdkjvmti/ti_heap.cc
@@ -1780,7 +1780,7 @@
       // already have.
       // TODO We technically only need to do this if the frames are not already being interpreted.
       // The cost for doing an extra stack walk is unlikely to be worth it though.
-      instr->InstrumentThreadStack(t);
+      instr->InstrumentThreadStack(t, /* deopt_all_frames= */ true);
     }
   }
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index f5f1274..5ef1d3e 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2525,3 +2525,36 @@
     // artCompileOptimized doesn't allow thread suspension.
     blx lr
 END art_quick_compile_optimized
+
+// On entry, method is at the bottom of the stack.
+ENTRY art_quick_method_entry_hook
+    SETUP_SAVE_EVERYTHING_FRAME r0
+    ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
+    mov r1, rSELF                            @ pass Thread::Current
+    bl  artMethodEntryHook                   @ (ArtMethod*, Thread*)
+    RESTORE_SAVE_EVERYTHING_FRAME
+    REFRESH_MARKING_REGISTER
+    blx lr
+END art_quick_method_entry_hook
+
+ENTRY art_quick_method_exit_hook
+    SETUP_SAVE_EVERYTHING_FRAME r2
+
+    add r3, sp, #8                            @ store fpr_res pointer, in kSaveEverything frame
+    add r2, sp, #136                          @ store gpr_res pointer, in kSaveEverything frame
+    ldr r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod*
+    mov r0, rSELF                             @ pass Thread::Current
+    blx artMethodExitHook                     @ (Thread*, ArtMethod*, gpr_res*, fpr_res*)
+
+    .cfi_remember_state
+    cbnz r0, .Ldo_deliver_instrumentation_exception_exit @ Deliver exception
+
+    // Normal return.
+    RESTORE_SAVE_EVERYTHING_FRAME
+    REFRESH_MARKING_REGISTER
+    blx lr
+.Ldo_deliver_instrumentation_exception_exit:
+    .cfi_restore_state
+    .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END art_quick_method_exit_hook
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 022a0e4..e5dbeda 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2630,3 +2630,40 @@
     // artCompileOptimized doesn't allow thread suspension.
     ret
 END art_quick_compile_optimized
+
+    .extern artMethodEntryHook
+ENTRY art_quick_method_entry_hook
+    SETUP_SAVE_EVERYTHING_FRAME
+
+    ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod*
+    mov x1, xSELF                             // pass Thread::Current
+    bl  artMethodEntryHook                    // (ArtMethod*, Thread*)
+
+    RESTORE_SAVE_EVERYTHING_FRAME             // Note: will restore xSELF
+    REFRESH_MARKING_REGISTER
+    ret
+END art_quick_method_entry_hook
+
+    .extern artMethodExitHook
+ENTRY art_quick_method_exit_hook
+    SETUP_SAVE_EVERYTHING_FRAME
+
+    add x3, sp, #16                           // floating-point result ptr in kSaveEverything frame
+    add x2, sp, #272                          // integer result ptr in kSaveEverything frame
+    ldr x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // ArtMethod*
+    mov x0, xSELF                             // Thread::Current
+    bl  artMethodExitHook                     // (Thread*, ArtMethod*, gpr_res*, fpr_res*)
+
+    .cfi_remember_state
+    cbnz x0, .Ldo_deliver_instrumentation_exception_exit // Handle exception
+
+    // Normal return.
+    RESTORE_SAVE_EVERYTHING_FRAME
+    REFRESH_MARKING_REGISTER
+    ret
+.Ldo_deliver_instrumentation_exception_exit:
+    .cfi_restore_state
+    .cfi_def_cfa sp, FRAME_SIZE_SAVE_EVERYTHING
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END art_quick_method_exit_hook
+
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index cda98d2..2f6af4f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2381,3 +2381,62 @@
     RESTORE_SAVE_EVERYTHING_FRAME
     ret
 END_FUNCTION art_quick_compile_optimized
+
+DEFINE_FUNCTION art_quick_method_entry_hook
+    SETUP_SAVE_EVERYTHING_FRAME edx
+    mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %eax // Fetch ArtMethod
+    subl LITERAL(8), %esp
+    CFI_ADJUST_CFA_OFFSET(8)
+
+    pushl %fs:THREAD_SELF_OFFSET    // Pass Thread::Current().
+    CFI_ADJUST_CFA_OFFSET(4)
+    pushl %eax                      // Pass Method*.
+    CFI_ADJUST_CFA_OFFSET(4)
+
+    call SYMBOL(artMethodEntryHook) // (Method*, Thread*)
+
+    addl LITERAL(16), %esp          // Pop arguments.
+    CFI_ADJUST_CFA_OFFSET(-16)
+
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
+END_FUNCTION art_quick_method_entry_hook
+
+DEFINE_FUNCTION art_quick_method_exit_hook
+    SETUP_SAVE_EVERYTHING_FRAME ebx
+
+    mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %ebx // Remember ArtMethod*
+    subl LITERAL(8), %esp                      // Align stack.
+    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH_ARG edx                   // Save gpr return value. edx and eax need to be together
+                                   // which isn't the case in kSaveEverything frame.
+    PUSH_ARG eax
+    movl %esp, %edx                // Get pointer to gpr_result
+    leal 32(%esp), %eax            // Get pointer to fpr_result, in kSaveEverything frame
+    PUSH_ARG eax                   // Pass fpr_result
+    PUSH_ARG edx                   // Pass gpr_result
+    PUSH_ARG ebx                   // Pass ArtMethod*
+    pushl %fs:THREAD_SELF_OFFSET   // Pass Thread::Current.
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artMethodExitHook) // (Thread*, ArtMethod*, gpr_result*, fpr_result*)
+
+    // Return result could have been changed if it's a reference.
+    movl 16(%esp), %ecx
+    movl %ecx, (80+32)(%esp)
+    addl LITERAL(32), %esp         // Pop arguments and grp_result.
+    CFI_ADJUST_CFA_OFFSET(-32)
+
+    cmpl LITERAL(1), %eax          // Check if we returned error.
+    CFI_REMEMBER_STATE
+    je .Ldo_deliver_instrumentation_exception_exit
+
+    // Normal return.
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
+.Ldo_deliver_instrumentation_exception_exit:
+    CFI_RESTORE_STATE_AND_DEF_CFA esp, FRAME_SIZE_SAVE_EVERYTHING
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END_FUNCTION art_quick_method_exit_hook
+
+
+
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8c21384..136198f 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2208,3 +2208,40 @@
     RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
     ret
 END_FUNCTION art_quick_compile_optimized
+
+// On entry, method is at the bottom of the stack.
+DEFINE_FUNCTION art_quick_method_entry_hook
+    SETUP_SAVE_EVERYTHING_FRAME
+
+    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
+    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
+
+    call SYMBOL(artMethodEntryHook)              // (ArtMethod*, Thread*)
+
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
+END_FUNCTION art_quick_method_entry_hook
+
+// On entry, method is at the bottom of the stack.
+// and r8 has should_deopt_frame value.
+DEFINE_FUNCTION art_quick_method_exit_hook
+    SETUP_SAVE_EVERYTHING_FRAME
+
+    leaq 16(%rsp), %rcx                         // floating-point result pointer in kSaveEverything
+                                                // frame
+    leaq 144(%rsp), %rdx                        // integer result pointer in kSaveEverything frame
+    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod
+    movq %gs:THREAD_SELF_OFFSET, %rdi           // Thread::Current
+    call SYMBOL(artMethodExitHook)              // (Thread*, SP, gpr_res*, fpr_res*)
+
+    cmpq LITERAL(1), %rax
+    CFI_REMEMBER_STATE
+    je .Ldo_deliver_instrumentation_exception_exit
+
+    // Normal return.
+    RESTORE_SAVE_EVERYTHING_FRAME
+    ret
+.Ldo_deliver_instrumentation_exception_exit:
+    CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END_FUNCTION art_quick_method_entry_hook
diff --git a/runtime/cha.cc b/runtime/cha.cc
index c345af8..392b35c 100644
--- a/runtime/cha.cc
+++ b/runtime/cha.cc
@@ -219,27 +219,12 @@
     }
 
     // The compiled code on stack is not valid anymore. Need to deoptimize.
-    SetShouldDeoptimizeFlag();
+    SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kCHA);
 
     return true;
   }
 
  private:
-  void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) {
-    QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo();
-    size_t frame_size = frame_info.FrameSizeInBytes();
-    uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
-    size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) *
-        GetBytesPerGprSpillLocation(kRuntimeISA);
-    size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) *
-        GetBytesPerFprSpillLocation(kRuntimeISA);
-    size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize;
-    uint8_t* should_deoptimize_addr = sp + offset;
-    // Set deoptimization flag to 1.
-    DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1);
-    *should_deoptimize_addr = 1;
-  }
-
   // Set of method headers for compiled code that should be deoptimized.
   const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
 
diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h
index 5be6f3d..c2e6a65 100644
--- a/runtime/deoptimization_kind.h
+++ b/runtime/deoptimization_kind.h
@@ -29,6 +29,7 @@
   kLoopNullBCE,
   kBlockBCE,
   kCHA,
+  kDebugging,
   kFullFrame,
   kLast = kFullFrame
 };
@@ -42,6 +43,7 @@
     case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null";
     case DeoptimizationKind::kBlockBCE: return "block bounds check elimination";
     case DeoptimizationKind::kCHA: return "class hierarchy analysis";
+    case DeoptimizationKind::kDebugging: return "Deopt requested for debug support";
     case DeoptimizationKind::kFullFrame: return "full frame";
   }
   LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind);
@@ -50,6 +52,15 @@
 
 std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind);
 
+// We use a DeoptimizationStackSlot to record if a deoptimization is required
+// for functions that are already on stack. The value in the slot specifies the
+// reason we need to deoptimize.
+enum class DeoptimizeFlagValue: uint8_t {
+  kCHA = 0b01,
+  kDebug = 0b10,
+  kAll = kCHA | kDebug
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_DEOPTIMIZATION_KIND_H_
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 9b1bd26..9f1766d 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -129,6 +129,10 @@
   qpoints->pUpdateInlineCache = art_quick_update_inline_cache;
   qpoints->pCompileOptimized = art_quick_compile_optimized;
 
+  // Tracing hooks
+  qpoints->pMethodEntryHook = art_quick_method_entry_hook;
+  qpoints->pMethodExitHook = art_quick_method_exit_hook;
+
   bool should_report = false;
   PaletteShouldReportJniInvocations(&should_report);
   if (should_report) {
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 4c451c6..a77e849 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -206,6 +206,8 @@
   V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
   V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \
 \
+  V(MethodEntryHook, void, ArtMethod*, Thread*) \
+  V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*)
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 0b58c36..b29da65 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -60,6 +60,9 @@
 
 namespace art {
 
+extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self);
+extern "C" NO_RETURN void artDeoptimize(Thread* self);
+
 // Visits the arguments as saved to the stack by a CalleeSaveType::kRefAndArgs callee save frame.
 class QuickArgumentVisitor {
   // Number of bytes for each out register in the caller method's frame.
@@ -2586,4 +2589,71 @@
   return result.GetJ();
 }
 
+extern "C" void artMethodEntryHook(ArtMethod* method, Thread* self, ArtMethod** sp ATTRIBUTE_UNUSED)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+  instr->MethodEnterEvent(self, method);
+  if (instr->IsDeoptimized(method)) {
+    // Instrumentation can request deoptimizing only a particular method (for
+    // ex: when there are break points on the method). In such cases deoptimize
+    // only this method. FullFrame deoptimizations are handled on method exits.
+    artDeoptimizeFromCompiledCode(DeoptimizationKind::kDebugging, self);
+  }
+}
+
+extern "C" int artMethodExitHook(Thread* self,
+                                 ArtMethod* method,
+                                 uint64_t* gpr_result,
+                                 uint64_t* fpr_result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current()));
+  CHECK(gpr_result != nullptr);
+  CHECK(fpr_result != nullptr);
+  // Instrumentation exit stub must not be entered with a pending exception.
+  CHECK(!self->IsExceptionPending())
+      << "Enter instrumentation exit stub with pending exception " << self->GetException()->Dump();
+
+  instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+  bool is_ref;
+  JValue return_value = instr->GetReturnValue(self, method, &is_ref, gpr_result, fpr_result);
+  bool deoptimize = false;
+  {
+    StackHandleScope<1> hs(self);
+    MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr));
+    if (is_ref) {
+      // Take a handle to the return value so we won't lose it if we suspend.
+      res.Assign(return_value.GetL());
+    }
+    DCHECK(!method->IsRuntimeMethod());
+    instr->MethodExitEvent(self,
+                           method,
+                           /* frame= */ {},
+                           return_value);
+
+    // Deoptimize if the caller needs to continue execution in the interpreter. Do nothing if we get
+    // back to an upcall.
+    NthCallerVisitor visitor(self, 1, true);
+    visitor.WalkStack(true);
+    deoptimize = instr->ShouldDeoptimizeMethod(self, visitor);
+
+    if (is_ref) {
+      // Restore the return value if it's a reference since it might have moved.
+      *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get();
+    }
+  }
+
+  if (self->IsExceptionPending() || self->ObserveAsyncException()) {
+    return 1;
+  }
+
+  if (deoptimize) {
+    DeoptimizationMethodType deopt_method_type = instr->GetDeoptimizationMethodType(method);
+    self->PushDeoptimizationContext(return_value, is_ref, nullptr, false, deopt_method_type);
+    artDeoptimize(self);
+    UNREACHABLE();
+  }
+
+  return 0;
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index d2096ec..c4e62e5 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -96,6 +96,8 @@
 
 extern "C" void* art_quick_string_builder_append(uint32_t format);
 extern "C" void art_quick_compile_optimized(ArtMethod*, Thread*);
+extern "C" void art_quick_method_entry_hook(ArtMethod*, Thread*);
+extern "C" int32_t art_quick_method_exit_hook(Thread*, ArtMethod*, uint64_t*, uint64_t*);
 
 }  // namespace art
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0853cae..609f081 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -402,9 +402,13 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
                          sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(
+        QuickEntryPoints, pReadBarrierForRootSlow, pMethodEntryHook, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodEntryHook, pMethodExitHook, sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierForRootSlow)
-            + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pMethodExitHook) + sizeof(void*) ==
+                sizeof(QuickEntryPoints),
+            QuickEntryPoints_all);
   }
 };
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e664eb1..87db899 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -229,6 +229,34 @@
          !method->IsProxyMethod();
 }
 
+bool Instrumentation::CodeNeedsEntryExitStub(const void* code, ArtMethod* method) {
+  // In some tests runtime isn't setup fully and hence the entry points could
+  // be nullptr.
+  if (code == nullptr) {
+    return true;
+  }
+
+  // When jiting code for debuggable apps we generate the code to call method
+  // entry / exit hooks when required. Hence it is not required to update
+  // to instrumentation entry point for JITed code in debuggable mode.
+  if (!Runtime::Current()->IsJavaDebuggable()) {
+    return true;
+  }
+
+  // Native functions can have JITed entry points but we don't include support
+  // for calling entry / exit hooks directly from the JITed code for native
+  // functions. So we still have to install entry exit stubs for such cases.
+  if (method->IsNative()) {
+    return true;
+  }
+
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) {
+    return false;
+  }
+  return true;
+}
+
 void Instrumentation::InstallStubsForMethod(ArtMethod* method) {
   if (!method->IsInvokable() || method->IsProxyMethod()) {
     // Do not change stubs for these methods.
@@ -272,7 +300,12 @@
         if (entry_exit_stubs_installed_) {
           // This needs to be checked first since the instrumentation entrypoint will be able to
           // find the actual JIT compiled code that corresponds to this method.
-          new_quick_code = GetQuickInstrumentationEntryPoint();
+          const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize);
+          if (CodeNeedsEntryExitStub(code, method)) {
+            new_quick_code = GetQuickInstrumentationEntryPoint();
+          } else {
+            new_quick_code = code;
+          }
         } else if (NeedDebugVersionFor(method)) {
           // It would be great to search the JIT for its implementation here but we cannot due to
           // the locks we hold. Instead just set to the interpreter bridge and that code will search
@@ -290,22 +323,29 @@
 }
 
 // Places the instrumentation exit pc as the return PC for every quick frame. This also allows
-// deoptimization of quick frames to interpreter frames.
+// deoptimization of quick frames to interpreter frames. When force_deopt is
+// true the frames have to be deoptimized. If the frame has a deoptimization
+// stack slot (all Jited frames), it is set to true to indicate this. For frames
+// that do not have this slot, the force_deopt_id on the InstrumentationStack is
+// used to check if the frame needs to be deoptimized. When force_deopt is false
+// we just instrument the stack for method entry / exit hooks.
 // Since we may already have done this previously, we need to push new instrumentation frame before
 // existing instrumentation frames.
-void InstrumentationInstallStack(Thread* thread, void* arg)
+void InstrumentationInstallStack(Thread* thread, void* arg, bool deopt_all_frames)
     REQUIRES(Locks::mutator_lock_) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
   struct InstallStackVisitor final : public StackVisitor {
     InstallStackVisitor(Thread* thread_in,
                         Context* context,
                         uintptr_t instrumentation_exit_pc,
-                        uint64_t force_deopt_id)
+                        uint64_t force_deopt_id,
+                        bool deopt_all_frames)
         : StackVisitor(thread_in, context, kInstrumentationStackWalk),
           instrumentation_stack_(thread_in->GetInstrumentationStack()),
           instrumentation_exit_pc_(instrumentation_exit_pc),
           reached_existing_instrumentation_frames_(false),
-          force_deopt_id_(force_deopt_id) {}
+          force_deopt_id_(force_deopt_id),
+          deopt_all_frames_(deopt_all_frames) {}
 
     bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) {
       ArtMethod* m = GetMethod();
@@ -355,6 +395,15 @@
           LOG(INFO) << "Ignoring already instrumented " << frame.Dump();
         }
       } else {
+        // If it is a JITed frame then just set the deopt bit if required
+        // otherwise continue
+        const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+        if (method_header != nullptr && method_header->HasShouldDeoptimizeFlag()) {
+          if (deopt_all_frames_) {
+            SetShouldDeoptimizeFlag(DeoptimizeFlagValue::kDebug);
+          }
+          return true;
+        }
         CHECK_NE(return_pc, 0U);
         if (UNLIKELY(reached_existing_instrumentation_frames_ && !m->IsRuntimeMethod())) {
           // We already saw an existing instrumentation frame so this should be a runtime-method
@@ -397,6 +446,7 @@
     const uintptr_t instrumentation_exit_pc_;
     bool reached_existing_instrumentation_frames_;
     uint64_t force_deopt_id_;
+    bool deopt_all_frames_;
   };
   if (kVerboseInstrumentation) {
     std::string thread_name;
@@ -407,8 +457,11 @@
   Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg);
   std::unique_ptr<Context> context(Context::Create());
   uintptr_t instrumentation_exit_pc = reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc());
-  InstallStackVisitor visitor(
-      thread, context.get(), instrumentation_exit_pc, instrumentation->current_force_deopt_id_);
+  InstallStackVisitor visitor(thread,
+                              context.get(),
+                              instrumentation_exit_pc,
+                              instrumentation->current_force_deopt_id_,
+                              deopt_all_frames);
   visitor.WalkStack(true);
 
   if (instrumentation->ShouldNotifyMethodEnterExitEvents()) {
@@ -431,9 +484,9 @@
   thread->VerifyStack();
 }
 
-void Instrumentation::InstrumentThreadStack(Thread* thread) {
+void Instrumentation::InstrumentThreadStack(Thread* thread, bool force_deopt) {
   instrumentation_stubs_installed_ = true;
-  InstrumentationInstallStack(thread, this);
+  InstrumentationInstallStack(thread, this, force_deopt);
 }
 
 // Removes the instrumentation exit pc as the return PC for every quick frame.
@@ -529,7 +582,7 @@
   ThreadList* tl = Runtime::Current()->GetThreadList();
   tl->ForEach([&](Thread* t) {
     Locks::mutator_lock_->AssertExclusiveHeld(self);
-    InstrumentThreadStack(t);
+    InstrumentThreadStack(t, /* deopt_all_frames= */ true);
   });
   current_force_deopt_id_++;
 }
@@ -781,7 +834,9 @@
     runtime->GetClassLinker()->VisitClasses(&visitor);
     instrumentation_stubs_installed_ = true;
     MutexLock mu(self, *Locks::thread_list_lock_);
-    runtime->GetThreadList()->ForEach(InstrumentationInstallStack, this);
+    for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) {
+      InstrumentThreadStack(thread, /* deopt_all_frames= */ false);
+    }
   } else {
     interpreter_stubs_installed_ = false;
     entry_exit_stubs_installed_ = false;
@@ -905,7 +960,8 @@
                  // implementation directly and this will confuse the instrumentation trampolines.
                  // TODO We should remove the need for this since it makes it impossible to profile
                  // Proxy.<init> correctly in all cases.
-                 method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init)) {
+                 method != jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Proxy_init) &&
+                 CodeNeedsEntryExitStub(quick_code, method)) {
         new_quick_code = GetQuickInstrumentationEntryPoint();
       } else {
         new_quick_code = quick_code;
@@ -998,7 +1054,12 @@
     // these previously so it will only cover the newly created frames.
     instrumentation_stubs_installed_ = true;
     MutexLock mu(self, *Locks::thread_list_lock_);
-    Runtime::Current()->GetThreadList()->ForEach(InstrumentationInstallStack, this);
+    for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) {
+      // This isn't a strong deopt. We deopt this method if it is still in the
+      // deopt methods list. If by the time we hit this frame we no longer need
+      // a deopt it is safe to continue. So we don't mark the frame.
+      InstrumentThreadStack(thread, /* deopt_all_frames= */ false);
+    }
   }
 }
 
@@ -1424,28 +1485,8 @@
   return shorty;
 }
 
-TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self,
-                                                            uintptr_t* return_pc_addr,
-                                                            uint64_t* gpr_result,
-                                                            uint64_t* fpr_result) {
-  DCHECK(gpr_result != nullptr);
-  DCHECK(fpr_result != nullptr);
-  // Do the pop.
-  std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack =
-      self->GetInstrumentationStack();
-  CHECK_GT(stack->size(), 0U);
-  auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr));
-  CHECK(it != stack->end());
-  InstrumentationStackFrame instrumentation_frame = it->second;
-  stack->erase(it);
-
-  // Set return PC and check the consistency of the stack.
-  // We don't cache the return pc value in a local as it may change after
-  // sending a method exit event.
-  *return_pc_addr = instrumentation_frame.return_pc_;
-  self->VerifyStack();
-
-  ArtMethod* method = instrumentation_frame.method_;
+JValue Instrumentation::GetReturnValue(
+    Thread* self, ArtMethod* method, bool* is_ref, uint64_t* gpr_result, uint64_t* fpr_result) {
   uint32_t length;
   const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   char return_shorty;
@@ -1477,9 +1518,7 @@
     return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
   }
 
-  bool is_ref = return_shorty == '[' || return_shorty == 'L';
-  StackHandleScope<1> hs(self);
-  MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr));
+  *is_ref = return_shorty == '[' || return_shorty == 'L';
   JValue return_value;
   if (return_shorty == 'V') {
     return_value.SetJ(0);
@@ -1488,6 +1527,59 @@
   } else {
     return_value.SetJ(*gpr_result);
   }
+  return return_value;
+}
+
+bool Instrumentation::ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor) {
+  bool should_deoptimize_frame = false;
+  const OatQuickMethodHeader* header = visitor.GetCurrentOatQuickMethodHeader();
+  if (header != nullptr && header->HasShouldDeoptimizeFlag()) {
+    uint8_t should_deopt_flag = visitor.GetShouldDeoptimizeFlag();
+    // DeoptimizeFlag could be set for debugging or for CHA invalidations.
+    // Deoptimize here only if it was requested for debugging. CHA
+    // invalidations are handled in the JITed code.
+    if ((should_deopt_flag & static_cast<uint8_t>(DeoptimizeFlagValue::kDebug)) != 0) {
+      should_deoptimize_frame = true;
+    }
+  }
+  return (visitor.caller != nullptr) &&
+         (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
+          self->IsForceInterpreter() ||
+          // NB Since structurally obsolete compiled methods might have the offsets of
+          // methods/fields compiled in we need to go back to interpreter whenever we hit
+          // them.
+          visitor.caller->GetDeclaringClass()->IsObsoleteObject() ||
+          Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller) ||
+          should_deoptimize_frame);
+}
+
+TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self,
+                                                            uintptr_t* return_pc_addr,
+                                                            uint64_t* gpr_result,
+                                                            uint64_t* fpr_result) {
+  DCHECK(gpr_result != nullptr);
+  DCHECK(fpr_result != nullptr);
+  // Do the pop.
+  std::map<uintptr_t, instrumentation::InstrumentationStackFrame>* stack =
+      self->GetInstrumentationStack();
+  CHECK_GT(stack->size(), 0U);
+  auto it = stack->find(reinterpret_cast<uintptr_t>(return_pc_addr));
+  CHECK(it != stack->end());
+  InstrumentationStackFrame instrumentation_frame = it->second;
+  stack->erase(it);
+
+  // Set return PC and check the consistency of the stack.
+  // We don't cache the return pc value in a local as it may change after
+  // sending a method exit event.
+  *return_pc_addr = instrumentation_frame.return_pc_;
+  self->VerifyStack();
+
+  ArtMethod* method = instrumentation_frame.method_;
+
+  bool is_ref;
+  JValue return_value = GetReturnValue(self, method, &is_ref, gpr_result, fpr_result);
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr));
   if (is_ref) {
     // Take a handle to the return value so we won't lose it if we suspend.
     // FIXME: The `is_ref` is often guessed wrong, so even object aligment
@@ -1504,17 +1596,11 @@
   // back to an upcall.
   NthCallerVisitor visitor(self, 1, true);
   visitor.WalkStack(true);
-  bool deoptimize = (visitor.caller != nullptr) &&
-                    (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
-                    self->IsForceInterpreter() ||
-                    // NB Since structurally obsolete compiled methods might have the offsets of
-                    // methods/fields compiled in we need to go back to interpreter whenever we hit
-                    // them.
-                    visitor.caller->GetDeclaringClass()->IsObsoleteObject() ||
-                    // Check if we forced all threads to deoptimize in the time between this frame
-                    // being created and now.
-                    instrumentation_frame.force_deopt_id_ != current_force_deopt_id_ ||
-                    Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
+  // Check if we forced all threads to deoptimize in the time between this frame being created and
+  // now.
+  bool should_deoptimize_frame = instrumentation_frame.force_deopt_id_ != current_force_deopt_id_;
+  bool deoptimize = ShouldDeoptimizeMethod(self, visitor) || should_deoptimize_frame;
+
   if (is_ref) {
     // Restore the return value if it's a reference since it might have moved.
     *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get();
@@ -1532,8 +1618,8 @@
     }
     DeoptimizationMethodType deopt_method_type = GetDeoptimizationMethodType(method);
     self->PushDeoptimizationContext(return_value,
-                                    return_shorty == 'L' || return_shorty == '[',
-                                    /* exception= */ nullptr ,
+                                    is_ref,
+                                    /* exception= */ nullptr,
                                     /* from_code= */ false,
                                     deopt_method_type);
     return GetTwoWordSuccessValue(*return_pc_addr,
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 988627c..4f4bb42 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -17,12 +17,13 @@
 #ifndef ART_RUNTIME_INSTRUMENTATION_H_
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
-#include <functional>
 #include <stdint.h>
+
+#include <functional>
 #include <list>
 #include <memory>
-#include <unordered_set>
 #include <optional>
+#include <unordered_set>
 
 #include "arch/instruction_set.h"
 #include "base/enums.h"
@@ -30,6 +31,7 @@
 #include "base/macros.h"
 #include "base/safe_map.h"
 #include "gc_root.h"
+#include "offsets.h"
 
 namespace art {
 namespace mirror {
@@ -41,6 +43,7 @@
 class ArtMethod;
 template <typename T> class Handle;
 template <typename T> class MutableHandle;
+struct NthCallerVisitor;
 union JValue;
 class SHARED_LOCKABLE ReaderWriterMutex;
 class ShadowFrame;
@@ -203,6 +206,10 @@
 
   Instrumentation();
 
+  static constexpr MemberOffset NeedsEntryExitHooksOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(Instrumentation, instrumentation_stubs_installed_));
+  }
+
   // Add a listener to be notified of the masked together sent of instrumentation events. This
   // suspend the runtime to install stubs. You are expected to hold the mutator lock as a proxy
   // for saying you should have suspended all threads (installing stubs while threads are running
@@ -479,6 +486,14 @@
   void ExceptionHandledEvent(Thread* thread, ObjPtr<mirror::Throwable> exception_object) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  JValue GetReturnValue(Thread* self,
+                        ArtMethod* method,
+                        bool* is_ref,
+                        uint64_t* gpr_result,
+                        uint64_t* fpr_result) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool ShouldDeoptimizeMethod(Thread* self, const NthCallerVisitor& visitor)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Called when an instrumented method is entered. The intended link register (lr) is saved so
   // that returning causes a branch to the method exit stub. Generates method enter events.
   void PushInstrumentationStackFrame(Thread* self,
@@ -524,10 +539,13 @@
                !GetDeoptimizedMethodsLock());
 
   // Install instrumentation exit stub on every method of the stack of the given thread.
-  // This is used by the debugger to cause a deoptimization of the thread's stack after updating
-  // local variable(s).
-  void InstrumentThreadStack(Thread* thread)
-      REQUIRES(Locks::mutator_lock_);
+  // This is used by:
+  //  - the debugger to cause a deoptimization of the all frames in thread's stack (for
+  //    example, after updating local variables)
+  //  - to call method entry / exit hooks for tracing. For this we instrument
+  //    the stack frame to run entry / exit hooks but we don't need to deoptimize.
+  // deopt_all_frames indicates whether the frames need to deoptimize or not.
+  void InstrumentThreadStack(Thread* thread, bool deopt_all_frames) REQUIRES(Locks::mutator_lock_);
 
   // Force all currently running frames to be deoptimized back to interpreter. This should only be
   // used in cases where basically all compiled code has been invalidated.
@@ -551,6 +569,10 @@
   // False otherwise.
   bool RequiresInstrumentationInstallation(InstrumentationLevel new_level) const;
 
+  // Returns true if we need entry exit stub to call entry hooks. JITed code
+  // directly call entry / exit hooks and don't need the stub.
+  bool CodeNeedsEntryExitStub(const void* code, ArtMethod* method);
+
   // Does the job of installing or removing instrumentation code within methods.
   // In order to support multiple clients using instrumentation at the same time,
   // the caller must pass a unique key (a string) identifying it so we remind which
@@ -743,7 +765,7 @@
 
   friend class InstrumentationTest;  // For GetCurrentInstrumentationLevel and ConfigureStubs.
   friend class InstrumentationStackPopper;  // For popping instrumentation frames.
-  friend void InstrumentationInstallStack(Thread*, void*);
+  friend void InstrumentationInstallStack(Thread*, void*, bool);
 
   DISALLOW_COPY_AND_ASSIGN(Instrumentation);
 };
diff --git a/runtime/oat.h b/runtime/oat.h
index 264ad9b..51b9e2e 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
-  // Last oat version changed reason: JNI: Rewrite read barrier slow path.
-  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '0', '8', '\0' } };
+  // Last oat version changed reason: reland new entry points for method entry / exit hooks.
+  static constexpr std::array<uint8_t, 4> kOatVersion{ {'2', '0', '9', '\0'} };
 
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 5f497af..ac5065b 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -599,7 +599,10 @@
               << GetDeoptimizationKindName(kind);
     DumpFramesWithType(self_, /* details= */ true);
   }
-  if (Runtime::Current()->UseJitCompilation()) {
+  // When deoptimizing for debug support the optimized code is still valid and
+  // can be reused when debugging support (like breakpoints) are no longer
+  // needed fot this method.
+  if (Runtime::Current()->UseJitCompilation() && (kind != DeoptimizationKind::kDebugging)) {
     Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
         deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader());
   } else {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 233106e..eb0fe56 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -800,6 +800,21 @@
   return RuntimeCalleeSaveFrame::GetMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs);
 }
 
+uint8_t* StackVisitor::GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(GetCurrentOatQuickMethodHeader()->HasShouldDeoptimizeFlag());
+  QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo();
+  size_t frame_size = frame_info.FrameSizeInBytes();
+  uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
+  size_t core_spill_size =
+      POPCOUNT(frame_info.CoreSpillMask()) * GetBytesPerGprSpillLocation(kRuntimeISA);
+  size_t fpu_spill_size =
+      POPCOUNT(frame_info.FpSpillMask()) * GetBytesPerFprSpillLocation(kRuntimeISA);
+  size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize;
+  uint8_t* should_deoptimize_addr = sp + offset;
+  DCHECK_EQ(*should_deoptimize_addr & ~static_cast<uint8_t>(DeoptimizeFlagValue::kAll), 0);
+  return should_deoptimize_addr;
+}
+
 template <StackVisitor::CountTransitions kCount>
 void StackVisitor::WalkStack(bool include_transitions) {
   if (check_suspended_) {
diff --git a/runtime/stack.h b/runtime/stack.h
index 2a6fdc2..1b00b54 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -17,12 +17,14 @@
 #ifndef ART_RUNTIME_STACK_H_
 #define ART_RUNTIME_STACK_H_
 
-#include <optional>
 #include <stdint.h>
+
+#include <optional>
 #include <string>
 
 #include "base/locks.h"
 #include "base/macros.h"
+#include "deoptimization_kind.h"
 #include "obj_ptr.h"
 #include "quick/quick_method_frame_info.h"
 #include "stack_map.h"
@@ -295,6 +297,15 @@
 
   QuickMethodFrameInfo GetCurrentQuickFrameInfo() const REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void SetShouldDeoptimizeFlag(DeoptimizeFlagValue value) REQUIRES_SHARED(Locks::mutator_lock_) {
+    uint8_t* should_deoptimize_addr = GetShouldDeoptimizeFlagAddr();
+    *should_deoptimize_addr = *should_deoptimize_addr | static_cast<uint8_t>(value);
+  };
+
+  uint8_t GetShouldDeoptimizeFlag() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return *GetShouldDeoptimizeFlagAddr();
+  }
+
  private:
   // Private constructor known in the case that num_frames_ has already been computed.
   StackVisitor(Thread* thread,
@@ -368,6 +379,8 @@
   mutable std::pair<const OatQuickMethodHeader*, CodeInfo> cur_inline_info_;
   mutable std::pair<uintptr_t, StackMap> cur_stack_map_;
 
+  uint8_t* GetShouldDeoptimizeFlagAddr() const REQUIRES_SHARED(Locks::mutator_lock_);
+
  protected:
   Context* const context_;
   const bool check_suspended_;
diff --git a/runtime/trace.cc b/runtime/trace.cc
index ca0fe10..4b5412f 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -421,10 +421,11 @@
                                             "Sampling profiler thread");
         the_trace_->interval_us_ = interval_us;
       } else {
-        runtime->GetInstrumentation()->AddListener(the_trace_,
-                                                   instrumentation::Instrumentation::kMethodEntered |
-                                                   instrumentation::Instrumentation::kMethodExited |
-                                                   instrumentation::Instrumentation::kMethodUnwind);
+        runtime->GetInstrumentation()->AddListener(
+            the_trace_,
+            instrumentation::Instrumentation::kMethodEntered |
+                instrumentation::Instrumentation::kMethodExited |
+                instrumentation::Instrumentation::kMethodUnwind);
         // TODO: In full-PIC mode, we don't need to fully deopt.
         // TODO: We can only use trampoline entrypoints if we are java-debuggable since in that case
         // we know that inlining and other problematic optimizations are disabled. We might just
@@ -480,9 +481,10 @@
         runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
       } else {
         runtime->GetInstrumentation()->RemoveListener(
-            the_trace, instrumentation::Instrumentation::kMethodEntered |
-            instrumentation::Instrumentation::kMethodExited |
-            instrumentation::Instrumentation::kMethodUnwind);
+            the_trace,
+            instrumentation::Instrumentation::kMethodEntered |
+                instrumentation::Instrumentation::kMethodExited |
+                instrumentation::Instrumentation::kMethodUnwind);
         runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
       }
     }
diff --git a/test/2011-stack-walk-concurrent-instrument/src/Main.java b/test/2011-stack-walk-concurrent-instrument/src/Main.java
index 8f96f93..53a7eea 100644
--- a/test/2011-stack-walk-concurrent-instrument/src/Main.java
+++ b/test/2011-stack-walk-concurrent-instrument/src/Main.java
@@ -33,7 +33,7 @@
   }
 
   public native void resetTest();
-  public native void waitAndDeopt(Thread t);
+  public native void waitAndInstrumentStack(Thread t);
   public native void doSelfStackWalk();
 
   void testConcurrent() throws Exception {
@@ -41,7 +41,7 @@
     final Thread current = Thread.currentThread();
     Thread t = new Thread(() -> {
       try {
-        this.waitAndDeopt(current);
+        this.waitAndInstrumentStack(current);
       } catch (Exception e) {
         throw new Error("Fail!", e);
       }
diff --git a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
index a10fe2e..5eaaa05 100644
--- a/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
+++ b/test/2011-stack-walk-concurrent-instrument/stack_walk_concurrent.cc
@@ -76,7 +76,9 @@
   CHECK(sswv.found_g_);
   CHECK(sswv.found_h_);
 }
-extern "C" JNIEXPORT void JNICALL Java_Main_waitAndDeopt(JNIEnv*, jobject, jobject target) {
+extern "C" JNIEXPORT void JNICALL Java_Main_waitAndInstrumentStack(JNIEnv*,
+                                                                   jobject,
+                                                                   jobject target) {
   while (!instrument_waiting) {
   }
   bool timed_out = false;
@@ -85,7 +87,8 @@
   CHECK(!timed_out);
   CHECK(other != nullptr);
   ScopedSuspendAll ssa(__FUNCTION__);
-  Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(other);
+  Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(other,
+                                                                  /* deopt_all_frames= */ false);
   MutexLock mu(Thread::Current(), *Locks::thread_suspend_count_lock_);
   bool updated = other->ModifySuspendCount(Thread::Current(), -1, nullptr, SuspendReason::kInternal);
   CHECK(updated);