Add method tracing JVMTI callbacks

Add MethodEntryHook and MethodExitHook callbacks and associated
capabilities.

Split --jvmti-stress option in run-test into --jvmti-trace-stress and
--jvmti-redefine-stress to test each different component.

NB 3 differences from RI found:
  1) RI will call methodExitHook again if the method exit hook throws
     an exception. This can easily cause an infinite loop and the test
     is specifically tweaked to prevent this from happening on the RI.
  2) RI always includes the method being exited in the stack trace of
     errors thrown in the hooks. In ART we will not include the method
     if it is native. This is due to the way we call native methods
     and would be extremely difficult to change.
  3) The RI will allow exceptions thrown in the MethodEnterHook to be
     caught by the entered method in some situations. This occurs with
     the tryCatchExit test in 989. In ART this does not happen.

Bug: 34414073
Test: ./test.py --host -j40
Test: ART_TEST_FULL=true DEXTER_BINARY="/path/to/dexter" \
      ./test/testrunner/testrunner.py --host -j40 -t 988
Test: ART_TEST_FULL=true DEXTER_BINARY="/path/to/dexter" \
      ./test/testrunner/testrunner.py --host -j40 -t 989
Test: lunch aosp_angler-userdebug; \
      m -j40 droid build-art && \
      fastboot -w flashall && \
      ./test.py --target -j4

Change-Id: Iab229353fae23c2ea27c2b698c831627a9f861b1
diff --git a/runtime/Android.bp b/runtime/Android.bp
index d1e124f..26e52e0 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -245,7 +245,6 @@
         "entrypoints/quick/quick_entrypoints_enum.cc",
         "entrypoints/quick/quick_field_entrypoints.cc",
         "entrypoints/quick/quick_fillarray_entrypoints.cc",
-        "entrypoints/quick/quick_instrumentation_entrypoints.cc",
         "entrypoints/quick/quick_jni_entrypoints.cc",
         "entrypoints/quick/quick_lock_entrypoints.cc",
         "entrypoints/quick/quick_math_entrypoints.cc",
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 31a7f6a..307f9f0 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1630,8 +1630,10 @@
     @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
     str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
-    mov   r3, lr         @ pass LR
-    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
+    mov   r3, sp         @ pass SP
+    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
+    cbz   r0, .Ldeliver_instrumentation_entry_exception
+                         @ Deliver exception if we got nullptr as function.
     mov   r12, r0        @ r12 holds reference to code
     ldr   r0, [sp, #4]   @ restore r0
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
@@ -1647,19 +1649,13 @@
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset r1, 4
+    mov   r2, sp         @ store gpr_res pointer.
     vpush {d0}           @ save fp return value
     .cfi_adjust_cfa_offset 8
-    sub   sp, #8         @ space for return value argument. Note: AAPCS stack alignment is 8B, no
-                         @ need to align by 16.
-    .cfi_adjust_cfa_offset 8
-    vstr  d0, [sp]       @ d0 -> [sp] for fpr_res
-    mov   r2, r0         @ pass return value as gpr_res
-    mov   r3, r1
-    mov   r0, r9         @ pass Thread::Current
+    mov   r3, sp         @ store fpr_res pointer
     mov   r1, r12        @ pass SP
-    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res, fpr_res)
-    add   sp, #8
-    .cfi_adjust_cfa_offset -8
+    mov   r0, r9         @ pass Thread::Current
+    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
 
     mov   r2, r0         @ link register saved by instrumentation
     mov   lr, r1         @ r1 is holding link register if we're to bounce to deoptimize
@@ -1669,9 +1665,16 @@
     .cfi_adjust_cfa_offset -8
     .cfi_restore r0
     .cfi_restore r1
-    add sp, #32          @ remove callee save frame
-    .cfi_adjust_cfa_offset -32
-    bx    r2             @ return
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    cbz   r2, .Ldo_deliver_instrumentation_exception
+                         @ Deliver exception if we got nullptr as function.
+    bx    r2             @ Otherwise, return
+.Ldeliver_instrumentation_entry_exception:
+    @ Deliver exception for art_quick_instrumentation_entry placed after
+    @ art_quick_instrumentation_exit so that the fallthrough works.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+.Ldo_deliver_instrumentation_exception:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_entry
 
     /*
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 18015b5..c9ead54 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2168,15 +2168,19 @@
     mov   x20, x0             // Preserve method reference in a callee-save.
 
     mov   x2, xSELF
-    mov   x3, xLR
-    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, LR)
+    mov   x3, sp  // Pass SP
+    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP)
 
     mov   xIP0, x0            // x0 = result of call.
     mov   x0, x20             // Reload method reference.
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
+    cbz   xIP0, 1f            // Deliver the pending exception if method is null.
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
+
+1:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_entry
 
     .extern artInstrumentationMethodExitFromCode
@@ -2185,30 +2189,28 @@
 
     SETUP_SAVE_REFS_ONLY_FRAME
 
-    // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
-    // we would need to fully restore it. As there are a lot of callee-save registers, it seems
-    // easier to have an extra small stack area.
-
     str x0, [sp, #-16]!       // Save integer result.
     .cfi_adjust_cfa_offset 16
-    str d0,  [sp, #8]         // Save floating-point result.
+    str d0, [sp, #8]          // Save floating-point result.
 
+    add   x3, sp, #8          // Pass floating-point result pointer.
+    mov   x2, sp              // Pass integer result pointer.
     add   x1, sp, #16         // Pass SP.
-    mov   x2, x0              // Pass integer result.
-    fmov  x3, d0              // Pass floating-point result.
     mov   x0, xSELF           // Pass Thread.
-    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
+    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res*, fpr_res*)
 
     mov   xIP0, x0            // Return address from instrumentation call.
     mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
 
     ldr   d0, [sp, #8]        // Restore floating-point result.
     ldr   x0, [sp], #16       // Restore integer result, and drop stack area.
-    .cfi_adjust_cfa_offset 16
+    .cfi_adjust_cfa_offset -16
 
-    POP_SAVE_REFS_ONLY_FRAME
-
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    cbz   xIP0, 1f            // Handle error
     br    xIP0                // Tail-call out.
+1:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2222f5c..4fb3fe5 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1948,17 +1948,23 @@
 DEFINE_FUNCTION art_quick_instrumentation_entry
     SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, edx
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
-    subl LITERAL(12), %esp        // Align stack.
-    CFI_ADJUST_CFA_OFFSET(12)
-    pushl FRAME_SIZE_SAVE_REFS_AND_ARGS-4+16(%esp)  // Pass LR.
-    CFI_ADJUST_CFA_OFFSET(4)
+    subl LITERAL(16), %esp        // Align stack (12 bytes) and reserve space for the SP argument
+    CFI_ADJUST_CFA_OFFSET(16)     // (4 bytes). We lack the scratch registers to calculate the SP
+                                  // right now, so we will just fill it in later.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // Pass receiver.
     PUSH eax                      // Pass Method*.
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
+    leal 32(%esp), %eax           // Put original SP into eax
+    movl %eax, 12(%esp)           // set SP
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
+
     addl LITERAL(28), %esp        // Pop arguments upto saved Method*.
     CFI_ADJUST_CFA_OFFSET(-28)
+
+    testl %eax, %eax
+    jz 1f                         // Test for null return (indicating exception) and handle it.
+
     movl 60(%esp), %edi           // Restore edi.
     movl %eax, 60(%esp)           // Place code* over edi, just under return pc.
     movl SYMBOL(art_quick_instrumentation_exit)@GOT(%ebx), %ebx
@@ -1980,6 +1986,12 @@
     addl LITERAL(60), %esp        // Wind stack back upto code*.
     CFI_ADJUST_CFA_OFFSET(-60)
     ret                           // Call method (and pop).
+1:
+    // Make caller handle exception
+    addl LITERAL(4), %esp
+    CFI_ADJUST_CFA_OFFSET(-4)
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_entry
 
 DEFINE_FUNCTION art_quick_instrumentation_exit
@@ -1992,18 +2004,19 @@
     movq %xmm0, (%esp)
     PUSH edx                      // Save gpr return value.
     PUSH eax
-    subl LITERAL(16), %esp        // Align stack
-    CFI_ADJUST_CFA_OFFSET(16)
-    movq %xmm0, (%esp)            // Pass float return value.
-    PUSH edx                      // Pass gpr return value.
-    PUSH eax
+    leal 8(%esp), %eax            // Get pointer to fpr_result
+    movl %esp, %edx               // Get pointer to gpr_result
+    PUSH eax                      // Pass fpr_result
+    PUSH edx                      // Pass gpr_result
     PUSH ecx                      // Pass SP.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current.
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artInstrumentationMethodExitFromCode)  // (Thread*, SP, gpr_result, fpr_result)
+    call SYMBOL(artInstrumentationMethodExitFromCode)  // (Thread*, SP, gpr_result*, fpr_result*)
+    testl %eax, %eax              // Check if we returned error.
+    jz 1f
     mov   %eax, %ecx              // Move returned link register.
-    addl LITERAL(32), %esp        // Pop arguments.
-    CFI_ADJUST_CFA_OFFSET(-32)
+    addl LITERAL(16), %esp        // Pop arguments.
+    CFI_ADJUST_CFA_OFFSET(-16)
     movl %edx, %ebx               // Move returned link register for deopt
                                   // (ebx is pretending to be our LR).
     POP eax                       // Restore gpr return value.
@@ -2015,6 +2028,11 @@
     addl LITERAL(4), %esp         // Remove fake return pc.
     CFI_ADJUST_CFA_OFFSET(-4)
     jmp   *%ecx                   // Return.
+1:
+    addl LITERAL(32), %esp
+    CFI_ADJUST_CFA_OFFSET(-32)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 41651d8..46d4f41 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1920,19 +1920,24 @@
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp), %rcx   // Pass return PC.
+    movq %rsp, %rcx                     // Pass SP.
 
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
 
                                   // %rax = result of call.
-    movq %r12, %rdi               // Reload method pointer.
+    testq %rax, %rax
+    jz 1f
 
+    movq %r12, %rdi               // Reload method pointer.
     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
     movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jmp *%rax                     // Tail call to intended method.
+1:
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    DELIVER_PENDING_EXCEPTION
 #endif  // __APPLE__
 END_FUNCTION art_quick_instrumentation_entry
 
@@ -1948,15 +1953,16 @@
     movq  %rsp, %rsi                          // Pass SP.
 
     PUSH rax                  // Save integer result.
+    movq %rsp, %rdx           // Pass integer result pointer.
+
     subq LITERAL(8), %rsp     // Save floating-point result.
     CFI_ADJUST_CFA_OFFSET(8)
     movq %xmm0, (%rsp)
+    movq %rsp, %rcx           // Pass floating-point result pointer.
 
     movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
-    movq  %rax, %rdx                          // Pass integer result.
-    movq  %xmm0, %rcx                         // Pass floating-point result.
 
-    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
+    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
 
     movq  %rax, %rdi          // Store return PC
     movq  %rdx, %rsi          // Store second return PC in hidden arg.
@@ -1968,9 +1974,15 @@
 
     RESTORE_SAVE_REFS_ONLY_FRAME
 
+    testq %rdi, %rdi          // Check if we have a return-pc to go to. If we don't then there was
+                              // an exception
+    jz 1f
+
     addq LITERAL(8), %rsp     // Drop fake return pc.
 
     jmp   *%rdi               // Return.
+1:
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 7de8916..d591e09 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -664,7 +664,9 @@
     }
     if (existing_entry_point == GetQuickInstrumentationEntryPoint()) {
       // We are running the generic jni stub, but the method is being instrumented.
-      DCHECK_EQ(pc, 0u) << "Should be a downcall";
+      // NB We would normally expect the pc to be zero but we can have non-zero pc's if
+      // instrumentation is installed or removed during the call which is using the generic jni
+      // trampoline.
       DCHECK(IsNative());
       return nullptr;
     }
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
deleted file mode 100644
index 0d2f6d0..0000000
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "base/callee_save_type.h"
-#include "base/enums.h"
-#include "callee_save_frame.h"
-#include "entrypoints/runtime_asm_entrypoints.h"
-#include "instrumentation.h"
-#include "mirror/object-inl.h"
-#include "runtime.h"
-#include "thread-current-inl.h"
-
-namespace art {
-
-extern "C" const void* artInstrumentationMethodEntryFromCode(ArtMethod* method,
-                                                             mirror::Object* this_object,
-                                                             Thread* self,
-                                                             uintptr_t lr)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Instrumentation changes the stack. Thus, when exiting, the stack cannot be verified, so skip
-  // that part.
-  ScopedQuickEntrypointChecks sqec(self, kIsDebugBuild, false);
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  const void* result;
-  if (instrumentation->IsDeoptimized(method)) {
-    result = GetQuickToInterpreterBridge();
-  } else {
-    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
-    DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
-  }
-  bool interpreter_entry = (result == GetQuickToInterpreterBridge());
-  instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? nullptr : this_object,
-                                                 method, lr, interpreter_entry);
-  CHECK(result != nullptr) << method->PrettyMethod();
-  return result;
-}
-
-extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self, ArtMethod** sp,
-                                                              uint64_t gpr_result,
-                                                              uint64_t fpr_result)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Instrumentation exit stub must not be entered with a pending exception.
-  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
-                                     << self->GetException()->Dump();
-  // Compute address of return PC and sanity check that it currently holds 0.
-  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly);
-  uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
-                                                      return_pc_offset);
-  CHECK_EQ(*return_pc, 0U);
-
-  // Pop the frame filling in the return pc. The low half of the return value is 0 when
-  // deoptimization shouldn't be performed with the high-half having the return address. When
-  // deoptimization should be performed the low half is zero and the high-half the address of the
-  // deoptimization entry point.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
-      self, return_pc, gpr_result, fpr_result);
-  return return_or_deoptimize_pc;
-}
-
-}  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 6fcb711..b7cd39f 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -28,6 +28,7 @@
 #include "imt_conflict_table.h"
 #include "imtable-inl.h"
 #include "interpreter/interpreter.h"
+#include "instrumentation.h"
 #include "linear_alloc.h"
 #include "method_bss_mapping.h"
 #include "method_handles.h"
@@ -895,7 +896,6 @@
     soa_->Env()->DeleteLocalRef(pair.first);
   }
 }
-
 // Handler for invocation on proxy methods. On entry a frame will exist for the proxy object method
 // which is responsible for recording callee save registers. We explicitly place into jobjects the
 // incoming reference arguments (so they survive GC). We invoke the invocation handler, which is a
@@ -988,6 +988,77 @@
   }
 }
 
+extern "C" const void* artInstrumentationMethodEntryFromCode(ArtMethod* method,
+                                                             mirror::Object* this_object,
+                                                             Thread* self,
+                                                             ArtMethod** sp)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  const void* result;
+  // Instrumentation changes the stack. Thus, when exiting, the stack cannot be verified, so skip
+  // that part.
+  ScopedQuickEntrypointChecks sqec(self, kIsDebugBuild, false);
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->IsDeoptimized(method)) {
+    result = GetQuickToInterpreterBridge();
+  } else {
+    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
+    DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
+  }
+
+  bool interpreter_entry = (result == GetQuickToInterpreterBridge());
+  bool is_static = method->IsStatic();
+  uint32_t shorty_len;
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
+
+  ScopedObjectAccessUnchecked soa(self);
+  RememberForGcArgumentVisitor visitor(sp, is_static, shorty, shorty_len, &soa);
+  visitor.VisitArguments();
+
+  instrumentation->PushInstrumentationStackFrame(self,
+                                                 is_static ? nullptr : this_object,
+                                                 method,
+                                                 QuickArgumentVisitor::GetCallingPc(sp),
+                                                 interpreter_entry);
+
+  visitor.FixupReferences();
+  if (UNLIKELY(self->IsExceptionPending())) {
+    return nullptr;
+  }
+  CHECK(result != nullptr) << method->PrettyMethod();
+  return result;
+}
+
+extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self,
+                                                              ArtMethod** sp,
+                                                              uint64_t* gpr_result,
+                                                              uint64_t* fpr_result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current()));
+  CHECK(gpr_result != nullptr);
+  CHECK(fpr_result != nullptr);
+  // Instrumentation exit stub must not be entered with a pending exception.
+  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
+                                     << self->GetException()->Dump();
+  // Compute address of return PC and sanity check that it currently holds 0.
+  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly);
+  uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
+                                                      return_pc_offset);
+  CHECK_EQ(*return_pc, 0U);
+
+  // Pop the frame filling in the return pc. The low half of the return value is 0 when
+  // deoptimization shouldn't be performed with the high-half having the return address. When
+  // deoptimization should be performed the low half is zero and the high-half the address of the
+  // deoptimization entry point.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
+      self, return_pc, gpr_result, fpr_result);
+  if (self->IsExceptionPending()) {
+    return GetTwoWordFailureValue();
+  }
+  return return_or_deoptimize_pc;
+}
+
 // Lazily resolve a method for quick. Called by stub code.
 extern "C" const void* artQuickResolutionTrampoline(
     ArtMethod* called, mirror::Object* receiver, Thread* self, ArtMethod** sp)
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 9b9c70f..8120cc4 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1123,25 +1123,40 @@
 void Instrumentation::PushInstrumentationStackFrame(Thread* self, mirror::Object* this_object,
                                                     ArtMethod* method,
                                                     uintptr_t lr, bool interpreter_entry) {
-  // We have a callee-save frame meaning this value is guaranteed to never be 0.
-  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
+  DCHECK(!self->IsExceptionPending());
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   if (kVerboseInstrumentation) {
     LOG(INFO) << "Entering " << ArtMethod::PrettyMethod(method) << " from PC "
               << reinterpret_cast<void*>(lr);
   }
-  instrumentation::InstrumentationStackFrame instrumentation_frame(this_object, method, lr,
+
+  // We send the enter event before pushing the instrumentation frame to make cleanup easier. If the
+  // event causes an exception we can simply send the unwind event and return.
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> h_this(hs.NewHandle(this_object));
+  if (!interpreter_entry) {
+    MethodEnterEvent(self, h_this.Get(), method, 0);
+    if (self->IsExceptionPending()) {
+      MethodUnwindEvent(self, h_this.Get(), method, 0);
+      return;
+    }
+  }
+
+  // We have a callee-save frame meaning this value is guaranteed to never be 0.
+  DCHECK(!self->IsExceptionPending());
+  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
+
+  instrumentation::InstrumentationStackFrame instrumentation_frame(h_this.Get(), method, lr,
                                                                    frame_id, interpreter_entry);
   stack->push_front(instrumentation_frame);
-
-  if (!interpreter_entry) {
-    MethodEnterEvent(self, this_object, method, 0);
-  }
 }
 
-TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                                            uint64_t gpr_result,
-                                                            uint64_t fpr_result) {
+TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self,
+                                                            uintptr_t* return_pc,
+                                                            uint64_t* gpr_result,
+                                                            uint64_t* fpr_result) {
+  DCHECK(gpr_result != nullptr);
+  DCHECK(fpr_result != nullptr);
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -1157,13 +1172,20 @@
   uint32_t length;
   const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   char return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
+  bool is_ref = return_shorty == '[' || return_shorty == 'L';
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr));
   JValue return_value;
   if (return_shorty == 'V') {
     return_value.SetJ(0);
   } else if (return_shorty == 'F' || return_shorty == 'D') {
-    return_value.SetJ(fpr_result);
+    return_value.SetJ(*fpr_result);
   } else {
-    return_value.SetJ(gpr_result);
+    return_value.SetJ(*gpr_result);
+  }
+  if (is_ref) {
+    // Take a handle to the return value so we won't lose it if we suspend.
+    res.Assign(return_value.GetL());
   }
   // TODO: improve the dex pc information here, requires knowledge of current PC as opposed to
   //       return_pc.
@@ -1180,6 +1202,10 @@
   bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
                     Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
+  if (is_ref) {
+    // Restore the return value if it's a reference since it might have moved.
+    *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get();
+  }
   if (deoptimize && Runtime::Current()->IsAsyncDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << "Deoptimizing "
@@ -1214,9 +1240,8 @@
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
+  size_t idx = stack->size();
   InstrumentationStackFrame instrumentation_frame = stack->front();
-  // TODO: bring back CheckStackDepth(self, instrumentation_frame, 2);
-  stack->pop_front();
 
   ArtMethod* method = instrumentation_frame.method_;
   if (is_deoptimization) {
@@ -1234,6 +1259,10 @@
     uint32_t dex_pc = DexFile::kDexNoIndex;
     MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
   }
+  // TODO: bring back CheckStackDepth(self, instrumentation_frame, 2);
+  CHECK_EQ(stack->size(), idx);
+  DCHECK(instrumentation_frame.method_ == stack->front().method_);
+  stack->pop_front();
   return instrumentation_frame.return_pc_;
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 363985f..90b5def 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -432,9 +432,13 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Called when an instrumented method is exited. Removes the pushed instrumentation frame
-  // returning the intended link register. Generates method exit events.
+  // returning the intended link register. Generates method exit events. The gpr_result and
+  // fpr_result pointers are pointers to the locations where the integer/pointer and floating point
+  // result values of the function are stored. Both pointers must always be valid but the values
+  // held there will only be meaningful if interpreted as the appropriate type given the function
+  // being returned from.
   TwoWordReturn PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                             uint64_t gpr_result, uint64_t fpr_result)
+                                             uint64_t* gpr_result, uint64_t* fpr_result)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 4bc0f2f..85cf73b 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -254,6 +254,13 @@
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         method, 0);
+      if (UNLIKELY(self->IsExceptionPending())) {
+        instrumentation->MethodUnwindEvent(self,
+                                           shadow_frame.GetThisObject(code_item->ins_size_),
+                                           method,
+                                           0);
+        return JValue();
+      }
     }
 
     if (!stay_in_interpreter) {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 32a2378..45788e7 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -26,13 +26,13 @@
 namespace art {
 namespace interpreter {
 
-#define HANDLE_PENDING_EXCEPTION()                                                              \
+#define HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(instr)                                    \
   do {                                                                                          \
     DCHECK(self->IsExceptionPending());                                                         \
     self->AllowThreadSuspension();                                                              \
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame,           \
                                                                   inst->GetDexPc(insns),        \
-                                                                  instrumentation);             \
+                                                                  instr);                       \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
       /* Structured locking is to be enforced for abnormal termination, too. */                 \
       DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame);                        \
@@ -47,6 +47,8 @@
     }                                                                                           \
   } while (false)
 
+#define HANDLE_PENDING_EXCEPTION() HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(instrumentation)
+
 #define POSSIBLY_HANDLE_PENDING_EXCEPTION(_is_exception_pending, _next_function)  \
   do {                                                                            \
     if (UNLIKELY(_is_exception_pending)) {                                        \
@@ -218,6 +220,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -235,6 +241,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -253,6 +263,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -270,6 +284,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -307,6 +325,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
           // Re-load since it might have moved during the MethodExitEvent.
           result.SetL(shadow_frame.GetVRegReference(ref_idx));
         }
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 45773fd..3ec5b32 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -1731,6 +1731,7 @@
 }
 
 extern "C" bool ArtPlugin_Deinitialize() {
+  gEventHandler.Shutdown();
   PhaseUtil::Unregister();
   ThreadUtil::Unregister();
   ClassUtil::Unregister();
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index 4b83b7c..af85fb0 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -218,8 +218,8 @@
     .can_redefine_any_class                          = 0,
     .can_get_current_thread_cpu_time                 = 0,
     .can_get_thread_cpu_time                         = 0,
-    .can_generate_method_entry_events                = 0,
-    .can_generate_method_exit_events                 = 0,
+    .can_generate_method_entry_events                = 1,
+    .can_generate_method_exit_events                 = 1,
     .can_generate_all_class_hook_events              = 0,
     .can_generate_compiled_method_load_events        = 0,
     .can_generate_monitor_events                     = 0,
diff --git a/runtime/openjdkjvmti/events-inl.h b/runtime/openjdkjvmti/events-inl.h
index 57abf31..cb7e6a9 100644
--- a/runtime/openjdkjvmti/events-inl.h
+++ b/runtime/openjdkjvmti/events-inl.h
@@ -20,6 +20,7 @@
 #include <array>
 
 #include "events.h"
+#include "ScopedLocalRef.h"
 
 #include "art_jvmti.h"
 
@@ -135,6 +136,8 @@
       continue;
     }
     if (ShouldDispatch<kEvent>(env, thread)) {
+      ScopedLocalRef<jthrowable> thr(jnienv, jnienv->ExceptionOccurred());
+      jnienv->ExceptionClear();
       jint new_len = 0;
       unsigned char* new_data = nullptr;
       auto callback = impl::GetCallback<kEvent>(env);
@@ -148,6 +151,9 @@
                current_class_data,
                &new_len,
                &new_data);
+      if (thr.get() != nullptr && !jnienv->ExceptionCheck()) {
+        jnienv->Throw(thr.get());
+      }
       if (new_data != nullptr && new_data != current_class_data) {
         // Destroy the data the last transformer made. We skip this if the previous state was the
         // initial one since we don't know here which jvmtiEnv allocated it.
@@ -180,6 +186,25 @@
   }
 }
 
+// Events with JNIEnvs need to stash pending exceptions since they can cause new ones to be thrown.
+// In accordance with the JVMTI specification we allow exceptions originating from events to
+// overwrite the current exception, including exceptions originating from earlier events.
+// TODO It would be nice to add the overwritten exceptions to the suppressed exceptions list of the
+// newest exception.
+template <ArtJvmtiEvent kEvent, typename ...Args>
+inline void EventHandler::DispatchEvent(art::Thread* thread, JNIEnv* jnienv, Args... args) const {
+  for (ArtJvmTiEnv* env : envs) {
+    if (env != nullptr) {
+      ScopedLocalRef<jthrowable> thr(jnienv, jnienv->ExceptionOccurred());
+      jnienv->ExceptionClear();
+      DispatchEvent<kEvent, JNIEnv*, Args...>(env, thread, jnienv, args...);
+      if (thr.get() != nullptr && !jnienv->ExceptionCheck()) {
+        jnienv->Throw(thr.get());
+      }
+    }
+  }
+}
+
 template <ArtJvmtiEvent kEvent, typename ...Args>
 inline void EventHandler::DispatchEvent(ArtJvmTiEnv* env, art::Thread* thread, Args... args) const {
   using FnType = void(jvmtiEnv*, Args...);
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
index 320c59c..90bc122 100644
--- a/runtime/openjdkjvmti/events.cc
+++ b/runtime/openjdkjvmti/events.cc
@@ -32,19 +32,24 @@
 #include "events-inl.h"
 
 #include "art_jvmti.h"
+#include "art_method-inl.h"
 #include "base/logging.h"
 #include "gc/allocation_listener.h"
 #include "gc/gc_pause_listener.h"
 #include "gc/heap.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "handle_scope-inl.h"
 #include "instrumentation.h"
 #include "jni_env_ext-inl.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-current-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+#include "ti_phase.h"
 
 namespace openjdkjvmti {
 
@@ -294,6 +299,222 @@
   }
 }
 
+template<typename Type>
+static Type AddLocalRef(art::JNIEnvExt* e, art::mirror::Object* obj)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  return (obj == nullptr) ? nullptr : e->AddLocalReference<Type>(obj);
+}
+
+class JvmtiMethodTraceListener FINAL : public art::instrumentation::InstrumentationListener {
+ public:
+  explicit JvmtiMethodTraceListener(EventHandler* handler) : event_handler_(handler) {}
+
+  template<ArtJvmtiEvent kEvent, typename ...Args>
+  void RunEventCallback(art::Thread* self, art::JNIEnvExt* jnienv, Args... args)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    ScopedLocalRef<jthread> thread_jni(jnienv, AddLocalRef<jthread>(jnienv, self->GetPeer()));
+    // Just give the event a good sized JNI frame. 100 should be fine.
+    jnienv->PushFrame(100);
+    {
+      // Need to do trampoline! :(
+      art::ScopedThreadSuspension sts(self, art::ThreadState::kNative);
+      event_handler_->DispatchEvent<kEvent>(self,
+                                            static_cast<JNIEnv*>(jnienv),
+                                            thread_jni.get(),
+                                            args...);
+    }
+    jnienv->PopFrame();
+  }
+
+  // Call-back for when a method is entered.
+  void MethodEntered(art::Thread* self,
+                     art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                     art::ArtMethod* method,
+                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodEntry)) {
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      RunEventCallback<ArtJvmtiEvent::kMethodEntry>(self,
+                                                    jnienv,
+                                                    art::jni::EncodeArtMethod(method));
+    }
+  }
+
+  // Callback for when a method is exited with a reference return value.
+  void MethodExited(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    art::Handle<art::mirror::Object> return_value)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      DCHECK_EQ(method->GetReturnTypePrimitive(), art::Primitive::kPrimNot)
+          << method->PrettyMethod();
+      DCHECK(!self->IsExceptionPending());
+      jvalue val;
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      ScopedLocalRef<jobject> return_jobj(jnienv, AddLocalRef<jobject>(jnienv, return_value.Get()));
+      val.l = return_jobj.get();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_FALSE),
+          val);
+    }
+  }
+
+  // Call-back for when a method is exited.
+  void MethodExited(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    const art::JValue& return_value)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      DCHECK_NE(method->GetReturnTypePrimitive(), art::Primitive::kPrimNot)
+          << method->PrettyMethod();
+      DCHECK(!self->IsExceptionPending());
+      jvalue val;
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      // 64bit integer is the largest value in the union so we should be fine simply copying it into
+      // the union.
+      val.j = return_value.GetJ();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_FALSE),
+          val);
+    }
+  }
+
+  // Call-back for when a method is popped due to an exception throw. A method will either cause a
+  // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
+  void MethodUnwind(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      jvalue val;
+      // Just set this to 0xffffffffffffffff so it's not uninitialized.
+      val.j = static_cast<jlong>(-1);
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      art::StackHandleScope<1> hs(self);
+      art::Handle<art::mirror::Throwable> old_exception(hs.NewHandle(self->GetException()));
+      CHECK(!old_exception.IsNull());
+      self->ClearException();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_TRUE),
+          val);
+      // Match RI behavior of just throwing away original exception if a new one is thrown.
+      if (LIKELY(!self->IsExceptionPending())) {
+        self->SetException(old_exception.Get());
+      }
+    }
+  }
+
+  // Call-back for when the dex pc moves in a method. We don't currently have any events associated
+  // with this.
+  void DexPcMoved(art::Thread* self ATTRIBUTE_UNUSED,
+                  art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                  art::ArtMethod* method ATTRIBUTE_UNUSED,
+                  uint32_t new_dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we read from a field.
+  void FieldRead(art::Thread* self ATTRIBUTE_UNUSED,
+                 art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                 art::ArtMethod* method ATTRIBUTE_UNUSED,
+                 uint32_t dex_pc ATTRIBUTE_UNUSED,
+                 art::ArtField* field ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we write into a field.
+  void FieldWritten(art::Thread* self ATTRIBUTE_UNUSED,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    art::ArtField* field ATTRIBUTE_UNUSED,
+                    const art::JValue& field_value ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back when an exception is caught.
+  void ExceptionCaught(art::Thread* self ATTRIBUTE_UNUSED,
+                       art::Handle<art::mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we execute a branch.
+  void Branch(art::Thread* self ATTRIBUTE_UNUSED,
+              art::ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we get an invokevirtual or an invokeinterface.
+  void InvokeVirtualOrInterface(art::Thread* self ATTRIBUTE_UNUSED,
+                                art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                                art::ArtMethod* caller ATTRIBUTE_UNUSED,
+                                uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                art::ArtMethod* callee ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+ private:
+  EventHandler* const event_handler_;
+};
+
+static uint32_t GetInstrumentationEventsFor(ArtJvmtiEvent event) {
+  switch (event) {
+    case ArtJvmtiEvent::kMethodEntry:
+      return art::instrumentation::Instrumentation::kMethodEntered;
+    case ArtJvmtiEvent::kMethodExit:
+      return art::instrumentation::Instrumentation::kMethodExited |
+             art::instrumentation::Instrumentation::kMethodUnwind;
+    default:
+      LOG(FATAL) << "Unknown event ";
+      return 0;
+  }
+}
+
+static void SetupMethodTraceListener(JvmtiMethodTraceListener* listener,
+                                     ArtJvmtiEvent event,
+                                     bool enable) {
+  uint32_t new_events = GetInstrumentationEventsFor(event);
+  art::instrumentation::Instrumentation* instr = art::Runtime::Current()->GetInstrumentation();
+  art::gc::ScopedGCCriticalSection gcs(art::Thread::Current(),
+                                       art::gc::kGcCauseInstrumentation,
+                                       art::gc::kCollectorTypeInstrumentation);
+  art::ScopedSuspendAll ssa("jvmti method tracing installation");
+  if (enable) {
+    if (!instr->AreAllMethodsDeoptimized()) {
+      instr->EnableMethodTracing("jvmti-tracing", /*needs_interpreter*/true);
+    }
+    instr->AddListener(listener, new_events);
+  } else {
+    instr->RemoveListener(listener, new_events);
+  }
+}
+
 // Handle special work for the given event type, if necessary.
 void EventHandler::HandleEventType(ArtJvmtiEvent event, bool enable) {
   switch (event) {
@@ -306,6 +527,11 @@
       SetupGcPauseTracking(gc_pause_listener_.get(), event, enable);
       return;
 
+    case ArtJvmtiEvent::kMethodEntry:
+    case ArtJvmtiEvent::kMethodExit:
+      SetupMethodTraceListener(method_trace_listener_.get(), event, enable);
+      return;
+
     default:
       break;
   }
@@ -419,9 +645,21 @@
   return ERR(NONE);
 }
 
+void EventHandler::Shutdown() {
+  // Need to remove the method_trace_listener_ if it's there.
+  art::Thread* self = art::Thread::Current();
+  art::gc::ScopedGCCriticalSection gcs(self,
+                                       art::gc::kGcCauseInstrumentation,
+                                       art::gc::kCollectorTypeInstrumentation);
+  art::ScopedSuspendAll ssa("jvmti method tracing uninstallation");
+  // Just remove every possible event.
+  art::Runtime::Current()->GetInstrumentation()->RemoveListener(method_trace_listener_.get(), ~0);
+}
+
 EventHandler::EventHandler() {
   alloc_listener_.reset(new JvmtiAllocationListener(this));
   gc_pause_listener_.reset(new JvmtiGcPauseListener(this));
+  method_trace_listener_.reset(new JvmtiMethodTraceListener(this));
 }
 
 EventHandler::~EventHandler() {
diff --git a/runtime/openjdkjvmti/events.h b/runtime/openjdkjvmti/events.h
index b9e3cf0..5f37dcf 100644
--- a/runtime/openjdkjvmti/events.h
+++ b/runtime/openjdkjvmti/events.h
@@ -29,6 +29,7 @@
 struct ArtJvmTiEnv;
 class JvmtiAllocationListener;
 class JvmtiGcPauseListener;
+class JvmtiMethodTraceListener;
 
 // an enum for ArtEvents. This differs from the JVMTI events only in that we distinguish between
 // retransformation capable and incapable loading
@@ -137,6 +138,9 @@
   EventHandler();
   ~EventHandler();
 
+  // do cleanup for the event handler.
+  void Shutdown();
+
   // Register an env. It is assumed that this happens on env creation, that is, no events are
   // enabled, yet.
   void RegisterArtJvmTiEnv(ArtJvmTiEnv* env);
@@ -160,6 +164,12 @@
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
   inline void DispatchEvent(art::Thread* thread, Args... args) const;
+  // Dispatch event to all registered environments stashing exceptions as needed. This works since
+  // JNIEnv* is always the second argument if it is passed to an event. Needed since C++ does not
+  // allow partial template function specialization.
+  template <ArtJvmtiEvent kEvent, typename ...Args>
+  ALWAYS_INLINE
+  void DispatchEvent(art::Thread* thread, JNIEnv* jnienv, Args... args) const;
   // Dispatch event to the given environment, only.
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
@@ -211,6 +221,7 @@
 
   std::unique_ptr<JvmtiAllocationListener> alloc_listener_;
   std::unique_ptr<JvmtiGcPauseListener> gc_pause_listener_;
+  std::unique_ptr<JvmtiMethodTraceListener> method_trace_listener_;
 };
 
 }  // namespace openjdkjvmti