24 files changed, 571 insertions, 85 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7dcf28952d..fba4da63cc 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -3497,6 +3497,27 @@ void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruct
   }
 }
 
+void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register numerator = locations->InAt(0).AsRegister<Register>();
+
+  int32_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+
+  Register tmp = locations->GetTemp(0).AsRegister<Register>();
+  NearLabel done;
+  __ movl(out, numerator);
+  __ andl(out, Immediate(abs_imm-1));
+  __ j(Condition::kZero, &done);
+  __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+  __ testl(numerator, numerator);
+  __ cmovl(Condition::kLess, out, tmp);
+  __ Bind(&done);
+}
 
 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
@@ -3610,8 +3631,12 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
         } else if (imm == 1 || imm == -1) {
           DivRemOneOrMinusOne(instruction);
-        } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
-          DivByPowerOfTwo(instruction->AsDiv());
+        } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+          if (is_div) {
+            DivByPowerOfTwo(instruction->AsDiv());
+          } else {
+            RemByPowerOfTwo(instruction->AsRem());
+          }
         } else {
           DCHECK(imm <= -2 || imm >= 2);
           GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 1e49403402..deeef888e2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -218,6 +218,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivByPowerOfTwo(HDiv* instruction);
+  void RemByPowerOfTwo(HRem* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem* rem);
   void HandleCondition(HCondition* condition);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d8253907fc..14cff05f58 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3560,7 +3560,40 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr
       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
   }
 }
+void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint64_t abs_imm = AbsOrMin(imm);
+  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+  if (instruction->GetResultType() == DataType::Type::kInt32) {
+    NearLabel done;
+    __ movl(out, numerator);
+    __ andl(out, Immediate(abs_imm-1));
+    __ j(Condition::kZero, &done);
+    __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+    __ testl(numerator, numerator);
+    __ cmov(Condition::kLess, out, tmp, false);
+    __ Bind(&done);
+
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+    codegen_->Load64BitValue(tmp, abs_imm - 1);
+    NearLabel done;
 
+    __ movq(out, numerator);
+    __ andq(out, tmp);
+    __ j(Condition::kZero, &done);
+    __ movq(tmp, numerator);
+    __ sarq(tmp, Immediate(63));
+    __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
+    __ orq(out, tmp);
+    __ Bind(&done);
+  }
+}
 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location second = locations->InAt(1);
@@ -3737,8 +3770,12 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
-      DivByPowerOfTwo(instruction->AsDiv());
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+      if (is_div) {
+        DivByPowerOfTwo(instruction->AsDiv());
+      } else {
+        RemByPowerOfTwo(instruction->AsRem());
+      }
     } else {
       DCHECK(imm <= -2 || imm >= 2);
       GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 72c4fd499d..f74e130702 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -215,6 +215,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
   void GenerateRemFP(HRem* rem);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivByPowerOfTwo(HDiv* instruction);
+  void RemByPowerOfTwo(HRem* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleCondition(HCondition* condition);
diff --git a/openjdkjvmti/events.cc b/openjdkjvmti/events.cc
index 48df53a143..a96436e95a 100644
--- a/openjdkjvmti/events.cc
+++ b/openjdkjvmti/events.cc
@@ -32,6 +32,7 @@
 #include "events-inl.h"
 
 #include <array>
+#include <sys/time.h>
 
 #include "art_field-inl.h"
 #include "art_jvmti.h"
@@ -56,6 +57,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "ti_phase.h"
+#include "well_known_classes.h"
 
 namespace openjdkjvmti {
 
@@ -410,14 +412,103 @@ class JvmtiMonitorListener : public art::MonitorCallback {
   EventHandler* handler_;
 };
 
-static void SetupMonitorListener(art::MonitorCallback* listener, bool enable) {
+class JvmtiParkListener : public art::ParkCallback {
+ public:
+  explicit JvmtiParkListener(EventHandler* handler) : handler_(handler) {}
+
+  void ThreadParkStart(bool is_absolute, int64_t timeout)
+      override REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMonitorWait)) {
+      art::Thread* self = art::Thread::Current();
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      art::ArtField* parkBlockerField = art::jni::DecodeArtField(
+          art::WellKnownClasses::java_lang_Thread_parkBlocker);
+      art::ObjPtr<art::mirror::Object> blocker_obj = parkBlockerField->GetObj(self->GetPeer());
+      if (blocker_obj.IsNull()) {
+        blocker_obj = self->GetPeer();
+      }
+      int64_t timeout_ms;
+      if (!is_absolute) {
+        if (timeout == 0) {
+          timeout_ms = 0;
+        } else {
+          timeout_ms = timeout / 1000000;
+          if (timeout_ms == 0) {
+            // If we were instructed to park for a nonzero number of nanoseconds, but not enough
+            // to be a full millisecond, round up to 1 ms. A nonzero park() call will return
+            // soon, but a 0 wait or park call will wait indefinitely.
+            timeout_ms = 1;
+          }
+        }
+      } else {
+        struct timeval tv;
+        gettimeofday(&tv, (struct timezone *) nullptr);
+        int64_t now = tv.tv_sec * 1000LL + tv.tv_usec / 1000;
+        if (now < timeout) {
+          timeout_ms = timeout - now;
+        } else {
+          // Waiting for 0 ms is an indefinite wait; parking until a time in
+          // the past or the current time will return immediately, so emulate
+          // the shortest possible wait event.
+          timeout_ms = 1;
+        }
+      }
+      ScopedLocalRef<jobject> blocker(jnienv, AddLocalRef<jobject>(jnienv, blocker_obj.Ptr()));
+      RunEventCallback<ArtJvmtiEvent::kMonitorWait>(
+          handler_,
+          self,
+          jnienv,
+          blocker.get(),
+          static_cast<jlong>(timeout_ms));
+    }
+  }
+
+
+  // Our interpretation of the spec is that the JVMTI_EVENT_MONITOR_WAITED will be sent immediately
+  // after a thread has woken up from a sleep caused by a call to Object#wait. If the thread will
+  // never go to sleep (due to not having the lock, having bad arguments, or having an exception
+  // propogated from JVMTI_EVENT_MONITOR_WAIT) we will not send this event.
+  //
+  // This does not fully match the RI semantics. Specifically, we will not send the
+  // JVMTI_EVENT_MONITOR_WAITED event in one situation where the RI would, there was an exception in
+  // the JVMTI_EVENT_MONITOR_WAIT event but otherwise the call was fine. In that case the RI would
+  // send this event and return without going to sleep.
+  //
+  // See b/65558434 for more discussion.
+  void ThreadParkFinished(bool timeout) override REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMonitorWaited)) {
+      art::Thread* self = art::Thread::Current();
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      art::ArtField* parkBlockerField = art::jni::DecodeArtField(
+          art::WellKnownClasses::java_lang_Thread_parkBlocker);
+      art::ObjPtr<art::mirror::Object> blocker_obj = parkBlockerField->GetObj(self->GetPeer());
+      if (blocker_obj.IsNull()) {
+        blocker_obj = self->GetPeer();
+      }
+      ScopedLocalRef<jobject> blocker(jnienv, AddLocalRef<jobject>(jnienv, blocker_obj.Ptr()));
+      RunEventCallback<ArtJvmtiEvent::kMonitorWaited>(
+          handler_,
+          self,
+          jnienv,
+          blocker.get(),
+          static_cast<jboolean>(timeout));
+    }
+  }
+
+ private:
+  EventHandler* handler_;
+};
+
+static void SetupMonitorListener(art::MonitorCallback* monitor_listener, art::ParkCallback* park_listener, bool enable) {
   // We must not hold the mutator lock here, but if we're in FastJNI, for example, we might. For
   // now, do a workaround: (possibly) acquire and release.
   art::ScopedObjectAccess soa(art::Thread::Current());
   if (enable) {
-    art::Runtime::Current()->GetRuntimeCallbacks()->AddMonitorCallback(listener);
+    art::Runtime::Current()->GetRuntimeCallbacks()->AddMonitorCallback(monitor_listener);
+    art::Runtime::Current()->GetRuntimeCallbacks()->AddParkCallback(park_listener);
   } else {
-    art::Runtime::Current()->GetRuntimeCallbacks()->RemoveMonitorCallback(listener);
+    art::Runtime::Current()->GetRuntimeCallbacks()->RemoveMonitorCallback(monitor_listener);
+    art::Runtime::Current()->GetRuntimeCallbacks()->RemoveParkCallback(park_listener);
   }
 }
 
@@ -1053,7 +1144,7 @@ void EventHandler::HandleEventType(ArtJvmtiEvent event, bool enable) {
     case ArtJvmtiEvent::kMonitorWait:
     case ArtJvmtiEvent::kMonitorWaited:
       if (!OtherMonitorEventsEnabledAnywhere(event)) {
-        SetupMonitorListener(monitor_listener_.get(), enable);
+        SetupMonitorListener(monitor_listener_.get(), park_listener_.get(), enable);
       }
       return;
     default:
@@ -1204,6 +1295,7 @@ EventHandler::EventHandler()
   gc_pause_listener_.reset(new JvmtiGcPauseListener(this));
   method_trace_listener_.reset(new JvmtiMethodTraceListener(this));
   monitor_listener_.reset(new JvmtiMonitorListener(this));
+  park_listener_.reset(new JvmtiParkListener(this));
 }
 
 EventHandler::~EventHandler() {
diff --git a/openjdkjvmti/events.h b/openjdkjvmti/events.h
index 9f91a08b8b..abb15cc329 100644
--- a/openjdkjvmti/events.h
+++ b/openjdkjvmti/events.h
@@ -35,6 +35,7 @@ class JvmtiDdmChunkListener;
 class JvmtiGcPauseListener;
 class JvmtiMethodTraceListener;
 class JvmtiMonitorListener;
+class JvmtiParkListener;
 
 // an enum for ArtEvents. This differs from the JVMTI events only in that we distinguish between
 // retransformation capable and incapable loading
@@ -331,6 +332,7 @@ class EventHandler {
   std::unique_ptr<JvmtiGcPauseListener> gc_pause_listener_;
   std::unique_ptr<JvmtiMethodTraceListener> method_trace_listener_;
   std::unique_ptr<JvmtiMonitorListener> monitor_listener_;
+  std::unique_ptr<JvmtiParkListener> park_listener_;
 
   // True if frame pop has ever been enabled. Since we store pointers to stack frames we need to
   // continue to listen to this event even if it has been disabled.
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index e775fe4505..5daead9901 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -91,6 +91,15 @@ inline void BaseMutex::RegisterAsLocked(Thread* self) {
     CheckUnattachedThread(level_);
     return;
   }
+  LockLevel level = level_;
+  // It would be nice to avoid this condition checking in the non-debug case,
+  // but that would make the various methods that check if a mutex is held not
+  // work properly for thread wait locks. Since the vast majority of lock
+  // acquisitions are not thread wait locks, this check should not be too
+  // expensive.
+  if (UNLIKELY(level == kThreadWaitLock) && self->GetHeldMutex(kThreadWaitLock) != nullptr) {
+    level = kThreadWaitWakeLock;
+  }
   if (kDebugLocking) {
     // Check if a bad Mutex of this level or lower is held.
     bool bad_mutexes_held = false;
@@ -98,13 +107,13 @@ inline void BaseMutex::RegisterAsLocked(Thread* self) {
     // mutator_lock_ exclusive. This is because we suspending when holding locks at this level is
     // not allowed and if we hold the mutator_lock_ exclusive we must unsuspend stuff eventually
     // so there are no deadlocks.
-    if (level_ == kTopLockLevel &&
+    if (level == kTopLockLevel &&
         Locks::mutator_lock_->IsSharedHeld(self) &&
         !Locks::mutator_lock_->IsExclusiveHeld(self)) {
       LOG(ERROR) << "Lock level violation: holding \"" << Locks::mutator_lock_->name_ << "\" "
                   << "(level " << kMutatorLock << " - " << static_cast<int>(kMutatorLock)
                   << ") non-exclusive while locking \"" << name_ << "\" "
-                  << "(level " << level_ << " - " << static_cast<int>(level_) << ") a top level"
+                  << "(level " << level << " - " << static_cast<int>(level) << ") a top level"
                   << "mutex. This is not allowed.";
       bad_mutexes_held = true;
     } else if (this == Locks::mutator_lock_ && self->GetHeldMutex(kTopLockLevel) != nullptr) {
@@ -113,10 +122,10 @@ inline void BaseMutex::RegisterAsLocked(Thread* self) {
                  << "not allowed.";
       bad_mutexes_held = true;
     }
-    for (int i = level_; i >= 0; --i) {
+    for (int i = level; i >= 0; --i) {
       LockLevel lock_level_i = static_cast<LockLevel>(i);
       BaseMutex* held_mutex = self->GetHeldMutex(lock_level_i);
-      if (level_ == kTopLockLevel &&
+      if (level == kTopLockLevel &&
           lock_level_i == kMutatorLock &&
           Locks::mutator_lock_->IsExclusiveHeld(self)) {
         // This is checked above.
@@ -125,7 +134,7 @@ inline void BaseMutex::RegisterAsLocked(Thread* self) {
         LOG(ERROR) << "Lock level violation: holding \"" << held_mutex->name_ << "\" "
                    << "(level " << lock_level_i << " - " << i
                    << ") while locking \"" << name_ << "\" "
-                   << "(level " << level_ << " - " << static_cast<int>(level_) << ")";
+                   << "(level " << level << " - " << static_cast<int>(level) << ")";
         if (lock_level_i > kAbortLock) {
           // Only abort in the check below if this is more than abort level lock.
           bad_mutexes_held = true;
@@ -138,8 +147,8 @@ inline void BaseMutex::RegisterAsLocked(Thread* self) {
   }
   // Don't record monitors as they are outside the scope of analysis. They may be inspected off of
   // the monitor list.
-  if (level_ != kMonitorLock) {
-    self->SetHeldMutex(level_, this);
+  if (level != kMonitorLock) {
+    self->SetHeldMutex(level, this);
   }
 }
 
@@ -149,10 +158,17 @@ inline void BaseMutex::RegisterAsUnlocked(Thread* self) {
     return;
   }
   if (level_ != kMonitorLock) {
+    auto level = level_;
+    if (UNLIKELY(level == kThreadWaitLock) && self->GetHeldMutex(kThreadWaitWakeLock) == this) {
+      level = kThreadWaitWakeLock;
+    }
     if (kDebugLocking && gAborting == 0) {  // Avoid recursive aborts.
-      CHECK(self->GetHeldMutex(level_) == this) << "Unlocking on unacquired mutex: " << name_;
+      if (level == kThreadWaitWakeLock) {
+        CHECK(self->GetHeldMutex(kThreadWaitLock) != nullptr) << "Held " << kThreadWaitWakeLock << " without " << kThreadWaitLock;;
+      }
+      CHECK(self->GetHeldMutex(level) == this) << "Unlocking on unacquired mutex: " << name_;
     }
-    self->SetHeldMutex(level_, nullptr);
+    self->SetHeldMutex(level, nullptr);
   }
 }
 
@@ -214,7 +230,11 @@ inline bool Mutex::IsExclusiveHeld(const Thread* self) const {
   if (kDebugLocking) {
     // Sanity debug check that if we think it is locked we have it in our held mutexes.
     if (result && self != nullptr && level_ != kMonitorLock && !gAborting) {
-      CHECK_EQ(self->GetHeldMutex(level_), this);
+      if (level_ == kThreadWaitLock && self->GetHeldMutex(kThreadWaitLock) != this) {
+        CHECK_EQ(self->GetHeldMutex(kThreadWaitWakeLock), this);
+      } else {
+        CHECK_EQ(self->GetHeldMutex(level_), this);
+      }
     }
   }
   return result;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 7711be9c90..0c8fe58252 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -68,6 +68,14 @@ enum LockLevel : uint8_t {
   // A generic lock level for mutexs that should not allow any additional mutexes to be gained after
   // acquiring it.
   kGenericBottomLock,
+  // Tracks the second acquisition at the same lock level for kThreadWaitLock. This is an exception
+  // to the normal lock ordering, used to implement Monitor::Wait - while holding one kThreadWait
+  // level lock, it is permitted to acquire a second one - with internal safeguards to ensure that
+  // the second lock acquisition does not result in deadlock. This is implemented in the lock
+  // order by treating the second acquisition of a kThreadWaitLock as a kThreadWaitWakeLock
+  // acquisition. Thus, acquiring kThreadWaitWakeLock requires holding kThreadWaitLock.
+  kThreadWaitWakeLock,
+  kThreadWaitLock,
   kJdwpAdbStateLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 26a8d1310b..aba1c5ad94 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -93,6 +93,8 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap,
       from_space_num_bytes_at_first_pause_(0),
       mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
+      copied_live_bytes_ratio_sum_(0.f),
+      gc_count_(0),
       young_gen_(young_gen),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
       measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
@@ -2038,6 +2040,11 @@ void ConcurrentCopying::ReclaimPhase() {
     }
     CHECK_LE(to_objects, from_objects);
     CHECK_LE(to_bytes, from_bytes);
+    if (from_bytes > 0) {
+      copied_live_bytes_ratio_sum_ += static_cast<float>(to_bytes) / from_bytes;
+      gc_count_++;
+    }
+
     // Cleared bytes and objects, populated by the call to RegionSpace::ClearFromSpace below.
     uint64_t cleared_bytes;
     uint64_t cleared_objects;
@@ -3204,6 +3211,15 @@ void ConcurrentCopying::DumpPerformanceInfo(std::ostream& os) {
   if (rb_slow_path_count_gc_total_ > 0) {
     os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
   }
+  float average_ratio = copied_live_bytes_ratio_sum_ / gc_count_;
+
+  if (young_gen_) {
+    os << "Average minor GC copied live bytes ratio "
+       << average_ratio << " over " << gc_count_ << " minor GCs\n";
+  } else {
+    os << "Average major GC copied live bytes ratio "
+       << average_ratio << " over " << gc_count_ << " major GCs\n";
+  }
   os << "Cumulative bytes moved "
      << cumulative_bytes_moved_.load(std::memory_order_relaxed) << "\n";
   os << "Cumulative objects moved "
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 1a7464a05f..cd086c4fb8 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -359,6 +359,18 @@ class ConcurrentCopying : public GarbageCollector {
   Atomic<uint64_t> cumulative_bytes_moved_;
   Atomic<uint64_t> cumulative_objects_moved_;
 
+  // copied_live_bytes_ratio_sum_ and gc_count_ are read and written by CC per
+  // GC, in ReclaimPhase, and are read by DumpPerformanceInfo (potentially from
+  // another thread). However, at present, DumpPerformanceInfo is only called
+  // when the runtime shuts down, so no concurrent access.
+  // The sum of of all copied live bytes ratio (to_bytes/from_bytes)
+  float copied_live_bytes_ratio_sum_;
+  // The number of GC counts, used to calculate the average above. (It doesn't
+  // include GC where from_bytes is zero, IOW, from-space is empty, which is
+  // possible for minor GC if all allocated objects are in non-moving
+  // space.)
+  size_t gc_count_;
+
   // Generational "sticky", only trace through dirty objects in region space.
   const bool young_gen_;
   // If true, the GC thread is done scanning marked objects on dirty and aged
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 0f0a378142..df2a8e29cb 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -97,6 +97,7 @@ Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_
       lock_count_(0),
       obj_(GcRoot<mirror::Object>(obj)),
       wait_set_(nullptr),
+      wake_set_(nullptr),
       hash_code_(hash_code),
       locking_method_(nullptr),
       locking_dex_pc_(0),
@@ -120,6 +121,7 @@ Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_
       lock_count_(0),
       obj_(GcRoot<mirror::Object>(obj)),
       wait_set_(nullptr),
+      wake_set_(nullptr),
       hash_code_(hash_code),
       locking_method_(nullptr),
       locking_dex_pc_(0),
@@ -226,7 +228,8 @@ Monitor::~Monitor() {
 }
 
 void Monitor::AppendToWaitSet(Thread* thread) {
-  DCHECK(owner_ == Thread::Current());
+  // Not checking that the owner is equal to this thread, since we've released
+  // the monitor by the time this method is called.
   DCHECK(thread != nullptr);
   DCHECK(thread->GetWaitNext() == nullptr) << thread->GetWaitNext();
   if (wait_set_ == nullptr) {
@@ -245,24 +248,29 @@ void Monitor::AppendToWaitSet(Thread* thread) {
 void Monitor::RemoveFromWaitSet(Thread *thread) {
   DCHECK(owner_ == Thread::Current());
   DCHECK(thread != nullptr);
-  if (wait_set_ == nullptr) {
-    return;
-  }
-  if (wait_set_ == thread) {
-    wait_set_ = thread->GetWaitNext();
-    thread->SetWaitNext(nullptr);
-    return;
-  }
-
-  Thread* t = wait_set_;
-  while (t->GetWaitNext() != nullptr) {
-    if (t->GetWaitNext() == thread) {
-      t->SetWaitNext(thread->GetWaitNext());
-      thread->SetWaitNext(nullptr);
-      return;
+  auto remove = [&](Thread*& set){
+    if (set != nullptr) {
+      if (set == thread) {
+        set = thread->GetWaitNext();
+        thread->SetWaitNext(nullptr);
+        return true;
+      }
+      Thread* t = set;
+      while (t->GetWaitNext() != nullptr) {
+        if (t->GetWaitNext() == thread) {
+          t->SetWaitNext(thread->GetWaitNext());
+          thread->SetWaitNext(nullptr);
+          return true;
+        }
+        t = t->GetWaitNext();
+      }
     }
-    t = t->GetWaitNext();
+    return false;
+  };
+  if (remove(wait_set_)) {
+    return;
   }
+  remove(wake_set_);
 }
 
 void Monitor::SetObject(mirror::Object* object) {
@@ -699,33 +707,81 @@ void Monitor::FailedUnlock(mirror::Object* o,
 bool Monitor::Unlock(Thread* self) {
   DCHECK(self != nullptr);
   uint32_t owner_thread_id = 0u;
-  {
-    MutexLock mu(self, monitor_lock_);
-    Thread* owner = owner_;
-    if (owner != nullptr) {
-      owner_thread_id = owner->GetThreadId();
-    }
-    if (owner == self) {
-      // We own the monitor, so nobody else can be in here.
-      AtraceMonitorUnlock();
-      if (lock_count_ == 0) {
-        owner_ = nullptr;
-        locking_method_ = nullptr;
-        locking_dex_pc_ = 0;
-        // Wake a contender.
-        monitor_contenders_.Signal(self);
-      } else {
-        --lock_count_;
-      }
+  DCHECK(!monitor_lock_.IsExclusiveHeld(self));
+  monitor_lock_.Lock(self);
+  Thread* owner = owner_;
+  if (owner != nullptr) {
+    owner_thread_id = owner->GetThreadId();
+  }
+  if (owner == self) {
+    // We own the monitor, so nobody else can be in here.
+    AtraceMonitorUnlock();
+    if (lock_count_ == 0) {
+      owner_ = nullptr;
+      locking_method_ = nullptr;
+      locking_dex_pc_ = 0;
+      SignalContendersAndReleaseMonitorLock(self);
+      return true;
+    } else {
+      --lock_count_;
+      monitor_lock_.Unlock(self);
       return true;
     }
   }
   // We don't own this, so we're not allowed to unlock it.
   // The JNI spec says that we should throw IllegalMonitorStateException in this case.
   FailedUnlock(GetObject(), self->GetThreadId(), owner_thread_id, this);
+  monitor_lock_.Unlock(self);
   return false;
 }
 
+void Monitor::SignalContendersAndReleaseMonitorLock(Thread* self) {
+  // We want to signal one thread to wake up, to acquire the monitor that
+  // we are releasing. This could either be a Thread waiting on its own
+  // ConditionVariable, or a thread waiting on monitor_contenders_.
+  while (wake_set_ != nullptr) {
+    // No risk of waking ourselves here; since monitor_lock_ is not released until we're ready to
+    // return, notify can't move the current thread from wait_set_ to wake_set_ until this
+    // method is done checking wake_set_.
+    Thread* thread = wake_set_;
+    wake_set_ = thread->GetWaitNext();
+    thread->SetWaitNext(nullptr);
+
+    // Check to see if the thread is still waiting.
+    {
+      // In the case of wait(), we'll be acquiring another thread's GetWaitMutex with
+      // self's GetWaitMutex held. This does not risk deadlock, because we only acquire this lock
+      // for threads in the wake_set_. A thread can only enter wake_set_ from Notify or NotifyAll,
+      // and those hold monitor_lock_. Thus, the threads whose wait mutexes we acquire here must
+      // have already been released from wait(), since we have not released monitor_lock_ until
+      // after we've chosen our thread to wake, so there is no risk of the following lock ordering
+      // leading to deadlock:
+      // Thread 1 waits
+      // Thread 2 waits
+      // Thread 3 moves threads 1 and 2 from wait_set_ to wake_set_
+      // Thread 1 enters this block, and attempts to acquire Thread 2's GetWaitMutex to wake it
+      // Thread 2 enters this block, and attempts to acquire Thread 1's GetWaitMutex to wake it
+      //
+      // Since monitor_lock_ is not released until the thread-to-be-woken-up's GetWaitMutex is
+      // acquired, two threads cannot attempt to acquire each other's GetWaitMutex while holding
+      // their own and cause deadlock.
+      MutexLock wait_mu(self, *thread->GetWaitMutex());
+      if (thread->GetWaitMonitor() != nullptr) {
+        // Release the lock, so that a potentially awakened thread will not
+        // immediately contend on it. The lock ordering here is:
+        // monitor_lock_, self->GetWaitMutex, thread->GetWaitMutex
+        monitor_lock_.Unlock(self);
+        thread->GetWaitConditionVariable()->Signal(self);
+        return;
+      }
+    }
+  }
+  // If we didn't wake any threads that were originally waiting on us,
+  // wake a contender.
+  monitor_contenders_.Signal(self);
+  monitor_lock_.Unlock(self);
+}
+
 void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
                    bool interruptShouldThrow, ThreadState why) {
   DCHECK(self != nullptr);
@@ -755,17 +811,9 @@ void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
   }
 
   /*
-   * Add ourselves to the set of threads waiting on this monitor, and
-   * release our hold.  We need to let it go even if we're a few levels
+   * Release our hold - we need to let it go even if we're a few levels
    * deep in a recursive lock, and we need to restore that later.
-   *
-   * We append to the wait set ahead of clearing the count and owner
-   * fields so the subroutine can check that the calling thread owns
-   * the monitor.  Aside from that, the order of member updates is
-   * not order sensitive as we hold the pthread mutex.
    */
-  AppendToWaitSet(self);
-  ++num_waiters_;
   int prev_lock_count = lock_count_;
   lock_count_ = 0;
   owner_ = nullptr;
@@ -790,6 +838,17 @@ void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
     // Pseudo-atomically wait on self's wait_cond_ and release the monitor lock.
     MutexLock mu(self, *self->GetWaitMutex());
 
+    /*
+     * Add ourselves to the set of threads waiting on this monitor.
+     * It's important that we are only added to the wait set after
+     * acquiring our GetWaitMutex, so that calls to Notify() that occur after we
+     * have released monitor_lock_ will not move us from wait_set_ to wake_set_
+     * until we've signalled contenders on this monitor.
+     */
+    AppendToWaitSet(self);
+    ++num_waiters_;
+
+
     // Set wait_monitor_ to the monitor object we will be waiting on. When wait_monitor_ is
     // non-null a notifying or interrupting thread must signal the thread's wait_cond_ to wake it
     // up.
@@ -797,8 +856,7 @@ void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
     self->SetWaitMonitor(this);
 
     // Release the monitor lock.
-    monitor_contenders_.Signal(self);
-    monitor_lock_.Unlock(self);
+    SignalContendersAndReleaseMonitorLock(self);
 
     // Handle the case where the thread was interrupted before we called wait().
     if (self->IsInterrupted()) {
@@ -874,18 +932,12 @@ void Monitor::Notify(Thread* self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
     return;
   }
-  // Signal the first waiting thread in the wait set.
-  while (wait_set_ != nullptr) {
-    Thread* thread = wait_set_;
-    wait_set_ = thread->GetWaitNext();
-    thread->SetWaitNext(nullptr);
-
-    // Check to see if the thread is still waiting.
-    MutexLock wait_mu(self, *thread->GetWaitMutex());
-    if (thread->GetWaitMonitor() != nullptr) {
-      thread->GetWaitConditionVariable()->Signal(self);
-      return;
-    }
+  // Move one thread from waiters to wake set
+  Thread* to_move = wait_set_;
+  if (to_move != nullptr) {
+    wait_set_ = to_move->GetWaitNext();
+    to_move->SetWaitNext(wake_set_);
+    wake_set_ = to_move;
   }
 }
 
@@ -897,12 +949,20 @@ void Monitor::NotifyAll(Thread* self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
     return;
   }
-  // Signal all threads in the wait set.
-  while (wait_set_ != nullptr) {
-    Thread* thread = wait_set_;
-    wait_set_ = thread->GetWaitNext();
-    thread->SetWaitNext(nullptr);
-    thread->Notify();
+
+  // Move all threads from waiters to wake set
+  Thread* to_move = wait_set_;
+  if (to_move != nullptr) {
+    wait_set_ = nullptr;
+    Thread* move_to = wake_set_;
+    if (move_to == nullptr) {
+      wake_set_ = to_move;
+      return;
+    }
+    while (move_to->GetWaitNext() != nullptr) {
+      move_to = move_to->GetWaitNext();
+    }
+    move_to->SetWaitNext(to_move);
   }
 }
 
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 6b7604ec8a..c1f84e92bf 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -181,6 +181,8 @@ class Monitor {
   // this routine.
   void RemoveFromWaitSet(Thread* thread) REQUIRES(monitor_lock_);
 
+  void SignalContendersAndReleaseMonitorLock(Thread* self) RELEASE(monitor_lock_);
+
   // Changes the shape of a monitor from thin to fat, preserving the internal lock state. The
   // calling thread must own the lock or the owner must be suspended. There's a race with other
   // threads inflating the lock, installing hash codes and spurious failures. The caller should
@@ -306,6 +308,9 @@ class Monitor {
   // Threads currently waiting on this monitor.
   Thread* wait_set_ GUARDED_BY(monitor_lock_);
 
+  // Threads that were waiting on this monitor, but are now contending on it.
+  Thread* wake_set_ GUARDED_BY(monitor_lock_);
+
   // Stored object hash code, generated lazily by GetHashCode.
   AtomicInteger hash_code_;
 
diff --git a/runtime/runtime_callbacks.cc b/runtime/runtime_callbacks.cc
index 758917cf7e..bf74816f24 100644
--- a/runtime/runtime_callbacks.cc
+++ b/runtime/runtime_callbacks.cc
@@ -151,6 +151,26 @@ void RuntimeCallbacks::RemoveMonitorCallback(MonitorCallback* cb) {
   Remove(cb, &monitor_callbacks_);
 }
 
+void RuntimeCallbacks::ThreadParkStart(bool is_absolute, int64_t timeout) {
+  for (ParkCallback * cb : park_callbacks_) {
+    cb->ThreadParkStart(is_absolute, timeout);
+  }
+}
+
+void RuntimeCallbacks::ThreadParkFinished(bool timeout) {
+  for (ParkCallback * cb : park_callbacks_) {
+    cb->ThreadParkFinished(timeout);
+  }
+}
+
+void RuntimeCallbacks::AddParkCallback(ParkCallback* cb) {
+  park_callbacks_.push_back(cb);
+}
+
+void RuntimeCallbacks::RemoveParkCallback(ParkCallback* cb) {
+  Remove(cb, &park_callbacks_);
+}
+
 void RuntimeCallbacks::RemoveThreadLifecycleCallback(ThreadLifecycleCallback* cb) {
   Remove(cb, &thread_callbacks_);
 }
diff --git a/runtime/runtime_callbacks.h b/runtime/runtime_callbacks.h
index 9f0410d102..4cce15ed52 100644
--- a/runtime/runtime_callbacks.h
+++ b/runtime/runtime_callbacks.h
@@ -115,6 +115,19 @@ class MonitorCallback {
   virtual ~MonitorCallback() {}
 };
 
+class ParkCallback {
+ public:
+  // Called on entry to the Unsafe.#park method
+  virtual void ThreadParkStart(bool is_absolute, int64_t millis_timeout)
+      REQUIRES_SHARED(Locks::mutator_lock_) = 0;
+
+  // Called just after the thread has woken up from going to sleep for a park(). This will only be
+  // called for Unsafe.park() calls where the thread did (or at least could have) gone to sleep.
+  virtual void ThreadParkFinished(bool timed_out) REQUIRES_SHARED(Locks::mutator_lock_) = 0;
+
+  virtual ~ParkCallback() {}
+};
+
 // A callback to let parts of the runtime note that they are currently relying on a particular
 // method remaining in it's current state. Users should not rely on always being called. If multiple
 // callbacks are added the runtime will short-circuit when the first one returns 'true'.
@@ -193,6 +206,11 @@ class RuntimeCallbacks {
   void AddMonitorCallback(MonitorCallback* cb) REQUIRES_SHARED(Locks::mutator_lock_);
   void RemoveMonitorCallback(MonitorCallback* cb) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void ThreadParkStart(bool is_absolute, int64_t timeout) REQUIRES_SHARED(Locks::mutator_lock_);
+  void ThreadParkFinished(bool timed_out) REQUIRES_SHARED(Locks::mutator_lock_);
+  void AddParkCallback(ParkCallback* cb) REQUIRES_SHARED(Locks::mutator_lock_);
+  void RemoveParkCallback(ParkCallback* cb) REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Returns true if some MethodInspectionCallback indicates the method is being inspected/depended
   // on by some code.
   bool IsMethodBeingInspected(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -243,6 +261,8 @@ class RuntimeCallbacks {
       GUARDED_BY(Locks::mutator_lock_);
   std::vector<MonitorCallback*> monitor_callbacks_
       GUARDED_BY(Locks::mutator_lock_);
+  std::vector<ParkCallback*> park_callbacks_
+      GUARDED_BY(Locks::mutator_lock_);
   std::vector<MethodInspectionCallback*> method_inspection_callbacks_
       GUARDED_BY(Locks::mutator_lock_);
   std::vector<DdmCallback*> ddm_callbacks_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 66e852a216..e9fed76d6f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -302,8 +302,9 @@ void Thread::Park(bool is_absolute, int64_t time) {
   int old_state = tls32_.park_state_.fetch_add(1, std::memory_order_relaxed);
   if (old_state == kNoPermit) {
     // no permit was available. block thread until later.
-    // TODO: Call to signal jvmti here
+    Runtime::Current()->GetRuntimeCallbacks()->ThreadParkStart(is_absolute, time);
     int result = 0;
+    bool timed_out = false;
     if (!is_absolute && time == 0) {
       // Thread.getState() is documented to return waiting for untimed parks.
       ScopedThreadSuspension sts(this, ThreadState::kWaiting);
@@ -351,8 +352,10 @@ void Thread::Park(bool is_absolute, int64_t time) {
     }
     if (result == -1) {
       switch (errno) {
-        case EAGAIN:
         case ETIMEDOUT:
+          timed_out = true;
+          FALLTHROUGH_INTENDED;
+        case EAGAIN:
         case EINTR: break;  // park() is allowed to spuriously return
         default: PLOG(FATAL) << "Failed to park";
       }
@@ -360,6 +363,7 @@ void Thread::Park(bool is_absolute, int64_t time) {
     // Mark as no longer waiting, and consume permit if there is one.
     tls32_.park_state_.store(kNoPermit, std::memory_order_relaxed);
     // TODO: Call to signal jvmti here
+    Runtime::Current()->GetRuntimeCallbacks()->ThreadParkFinished(timed_out);
   } else {
     // the fetch_add has consumed the permit. immediately return.
     DCHECK_EQ(old_state, kPermitAvailable);
@@ -2250,7 +2254,7 @@ Thread::Thread(bool daemon)
     : tls32_(daemon),
       wait_monitor_(nullptr),
       is_runtime_thread_(false) {
-  wait_mutex_ = new Mutex("a thread wait mutex");
+  wait_mutex_ = new Mutex("a thread wait mutex", LockLevel::kThreadWaitLock);
   wait_cond_ = new ConditionVariable("a thread wait condition variable", *wait_mutex_);
   tlsPtr_.instrumentation_stack = new std::deque<instrumentation::InstrumentationStackFrame>;
   tlsPtr_.name = new std::string(kThreadNameDuringStartup);
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 94faa626f6..65039bc6d4 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -122,6 +122,7 @@ jfieldID WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList;
 jfieldID WellKnownClasses::dalvik_system_DexPathList_dexElements;
 jfieldID WellKnownClasses::dalvik_system_DexPathList__Element_dexFile;
 jfieldID WellKnownClasses::dalvik_system_VMRuntime_nonSdkApiUsageConsumer;
+jfieldID WellKnownClasses::java_lang_Thread_parkBlocker;
 jfieldID WellKnownClasses::java_lang_Thread_daemon;
 jfieldID WellKnownClasses::java_lang_Thread_group;
 jfieldID WellKnownClasses::java_lang_Thread_lock;
@@ -371,6 +372,7 @@ void WellKnownClasses::Init(JNIEnv* env) {
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
   dalvik_system_DexPathList__Element_dexFile = CacheField(env, dalvik_system_DexPathList__Element, false, "dexFile", "Ldalvik/system/DexFile;");
   dalvik_system_VMRuntime_nonSdkApiUsageConsumer = CacheField(env, dalvik_system_VMRuntime, true, "nonSdkApiUsageConsumer", "Ljava/util/function/Consumer;");
+  java_lang_Thread_parkBlocker = CacheField(env, java_lang_Thread, false, "parkBlocker", "Ljava/lang/Object;");
   java_lang_Thread_daemon = CacheField(env, java_lang_Thread, false, "daemon", "Z");
   java_lang_Thread_group = CacheField(env, java_lang_Thread, false, "group", "Ljava/lang/ThreadGroup;");
   java_lang_Thread_lock = CacheField(env, java_lang_Thread, false, "lock", "Ljava/lang/Object;");
@@ -518,6 +520,7 @@ void WellKnownClasses::Clear() {
   dalvik_system_DexPathList_dexElements = nullptr;
   dalvik_system_DexPathList__Element_dexFile = nullptr;
   dalvik_system_VMRuntime_nonSdkApiUsageConsumer = nullptr;
+  java_lang_Thread_parkBlocker = nullptr;
   java_lang_Thread_daemon = nullptr;
   java_lang_Thread_group = nullptr;
   java_lang_Thread_lock = nullptr;
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 8c85228dfc..130747c026 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -131,6 +131,7 @@ struct WellKnownClasses {
   static jfieldID dalvik_system_DexPathList_dexElements;
   static jfieldID dalvik_system_DexPathList__Element_dexFile;
   static jfieldID dalvik_system_VMRuntime_nonSdkApiUsageConsumer;
+  static jfieldID java_lang_Thread_parkBlocker;
   static jfieldID java_lang_Thread_daemon;
   static jfieldID java_lang_Thread_group;
   static jfieldID java_lang_Thread_lock;
diff --git a/test/1931-monitor-events/check b/test/1931-monitor-events/check
new file mode 100644
index 0000000000..8a7f844283
--- /dev/null
+++ b/test/1931-monitor-events/check
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Art sends events for park/unpark, and the RI doesn't. Remove it from the expected output.
+if [[ "$TEST_RUNTIME" == "jvm" ]]; then
+  patch -p0 expected.txt < jvm-expected.patch >/dev/null
+fi
+
+./default-check "$@"
diff --git a/test/1931-monitor-events/expected.txt b/test/1931-monitor-events/expected.txt
index 33a9bd3684..f368ae20be 100644
--- a/test/1931-monitor-events/expected.txt
+++ b/test/1931-monitor-events/expected.txt
@@ -1,6 +1,9 @@
 Testing contended locking.
 Locker thread 1 for NamedLock[Lock testLock] contended-LOCKING NamedLock[Lock testLock]
 Locker thread 1 for NamedLock[Lock testLock] LOCKED NamedLock[Lock testLock]
+Testing park.
+ParkThread start-monitor-wait NamedLock[Parking blocker object] timeout: 1
+ParkThread monitor-waited NamedLock[Parking blocker object] timed_out: true
 Testing monitor wait.
 Locker thread 2 for NamedLock[Lock testWait] start-monitor-wait NamedLock[Lock testWait] timeout: 0
 Locker thread 2 for NamedLock[Lock testWait] monitor-waited NamedLock[Lock testWait] timed_out: false
diff --git a/test/1931-monitor-events/jvm-expected.patch b/test/1931-monitor-events/jvm-expected.patch
new file mode 100644
index 0000000000..7595b145a2
--- /dev/null
+++ b/test/1931-monitor-events/jvm-expected.patch
@@ -0,0 +1,3 @@
+5,6d4
+< ParkThread start-monitor-wait NamedLock[Parking blocker object] timeout: 1
+< ParkThread monitor-waited NamedLock[Parking blocker object] timed_out: true
diff --git a/test/1931-monitor-events/src/art/Test1931.java b/test/1931-monitor-events/src/art/Test1931.java
index ccefede9f8..f5497893cb 100644
--- a/test/1931-monitor-events/src/art/Test1931.java
+++ b/test/1931-monitor-events/src/art/Test1931.java
@@ -23,6 +23,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.*;
+import java.util.concurrent.locks.LockSupport;
 import java.util.ListIterator;
 import java.util.function.Consumer;
 import java.util.function.Function;
@@ -67,6 +68,9 @@ public class Test1931 {
     System.out.println("Testing contended locking.");
     testLock(new Monitors.NamedLock("Lock testLock"));
 
+    System.out.println("Testing park.");
+    testPark(new Monitors.NamedLock("Parking blocker object"));
+
     System.out.println("Testing monitor wait.");
     testWait(new Monitors.NamedLock("Lock testWait"));
 
@@ -88,6 +92,14 @@ public class Test1931 {
     testInteruptWait(new Monitors.NamedLock("Lock testInteruptWait"));
   }
 
+  public static void testPark(Object blocker) throws Exception {
+    Thread holder = new Thread(() -> {
+      LockSupport.parkNanos(blocker, 10); // Should round up to one millisecond
+    }, "ParkThread");
+    holder.start();
+    holder.join();
+  }
+
   public static void testInteruptWait(final Monitors.NamedLock lk) throws Exception {
     final Monitors.LockController controller1 = new Monitors.LockController(lk);
     controller1.DoLock();
diff --git a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
index 72725c1cd4..54d7847fdf 100644
--- a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
+++ b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
@@ -92,6 +92,17 @@ public class RemTest {
   /// CHECK:                 cmp w{{\d+}}, #0x0
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x1
   /// CHECK:                 cneg w{{\d+}}, w{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntMod2(int v) {
     int r = v % 2;
     return r;
@@ -101,6 +112,17 @@ public class RemTest {
   /// CHECK:                 cmp w{{\d+}}, #0x0
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x1
   /// CHECK:                 cneg w{{\d+}}, w{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntModMinus2(int v) {
     int r = v % -2;
     return r;
@@ -111,6 +133,17 @@ public class RemTest {
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntMod16(int v) {
     int r = v % 16;
     return r;
@@ -121,6 +154,17 @@ public class RemTest {
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntModMinus16(int v) {
     int r = v % -16;
     return r;
@@ -131,6 +175,17 @@ public class RemTest {
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x7fffffff
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x7fffffff
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntModIntMin(int v) {
     int r = v % Integer.MIN_VALUE;
     return r;
@@ -211,6 +266,18 @@ public class RemTest {
   /// CHECK:                 cmp x{{\d+}}, #0x0
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x1
   /// CHECK:                 cneg x{{\d+}}, x{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod2(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongMod2(long v) {
     long r = v % 2;
     return r;
@@ -220,6 +287,18 @@ public class RemTest {
   /// CHECK:                 cmp x{{\d+}}, #0x0
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x1
   /// CHECK:                 cneg x{{\d+}}, x{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus2(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongModMinus2(long v) {
     long r = v % -2;
     return r;
@@ -230,6 +309,19 @@ public class RemTest {
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 csneg x{{\d+}}, x{{\d+}}, mi
+
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod16(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongMod16(long v) {
     long r = v % 16;
     return r;
@@ -240,6 +332,18 @@ public class RemTest {
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 csneg x{{\d+}}, x{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus16(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongModMinus16(long v) {
     long r = v % -16;
     return r;
@@ -250,6 +354,18 @@ public class RemTest {
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
   /// CHECK:                 csneg x{{\d+}}, x{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModLongMin(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongModLongMin(long v) {
     long r = v % Long.MIN_VALUE;
     return r;
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 3d70087b81..c1cc2e2318 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -81,6 +81,8 @@ elif [[ $mode == "target" ]]; then
     # These targets are needed for the chroot environment.
     make_command+=" crash_dump event-log-tags"
   fi
+  # Build the Runtime APEX.
+  make_command+=" com.android.runtime"
   mode_suffix="-target"
 fi
 
diff --git a/tools/hiddenapi/hiddenapi.cc b/tools/hiddenapi/hiddenapi.cc
index f61b3e8038..65a4945355 100644
--- a/tools/hiddenapi/hiddenapi.cc
+++ b/tools/hiddenapi/hiddenapi.cc
@@ -813,6 +813,7 @@ class DexFileEditor final {
     // Create a new MapItem entry with new MapList details.
     DexFile::MapItem new_item;
     new_item.type_ = old_item.type_;
+    new_item.unused_ = 0u;  // initialize to ensure dex output is deterministic (b/119308882)
     new_item.size_ = old_item.size_;
     new_item.offset_ = new_map_offset;