Merge "MIPS32: Add MSA instruction set feature option"
diff --git a/build/art.go b/build/art.go
index 9de2b05..f52c635 100644
--- a/build/art.go
+++ b/build/art.go
@@ -87,7 +87,7 @@
 				"-DART_STACK_OVERFLOW_GAP_arm64=8192",
 				"-DART_STACK_OVERFLOW_GAP_mips=16384",
 				"-DART_STACK_OVERFLOW_GAP_mips64=16384",
-				"-DART_STACK_OVERFLOW_GAP_x86=12288",
+				"-DART_STACK_OVERFLOW_GAP_x86=16384",
 				"-DART_STACK_OVERFLOW_GAP_x86_64=20480")
 	} else {
 		cflags = append(cflags,
diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc
index e691a67..2572291 100644
--- a/compiler/dex/inline_method_analyser.cc
+++ b/compiler/dex/inline_method_analyser.cc
@@ -433,8 +433,11 @@
     // Native or abstract.
     return false;
   }
-  return AnalyseMethodCode(
-      code_item, method->ToMethodReference(), method->IsStatic(), method, result);
+  return AnalyseMethodCode(code_item,
+                           MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
+                           method->IsStatic(),
+                           method,
+                           result);
 }
 
 bool InlineMethodAnalyser::AnalyseMethodCode(const DexFile::CodeItem* code_item,
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index a12d849..2283b39 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -33,6 +33,7 @@
 #include "base/enums.h"
 #include "base/length_prefixed_array.h"
 #include "base/macros.h"
+#include "class_table.h"
 #include "driver/compiler_driver.h"
 #include "image.h"
 #include "lock_word.h"
@@ -60,7 +61,6 @@
 }  // namespace mirror
 
 class ClassLoaderVisitor;
-class ClassTable;
 class ImtConflictTable;
 
 static constexpr int kInvalidFd = -1;
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 4731da1..ea3e9e5 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -23,6 +23,7 @@
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utils/mips/assembler_mips.h"
 #include "utils/mips/constants_mips.h"
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 00afbcd..2ecb1a3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -23,6 +23,7 @@
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utils/mips64/assembler_mips64.h"
 #include "utils/mips64/constants_mips64.h"
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 1a89567..832a7e1 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -288,6 +288,11 @@
   last_visited_latency_ = kArmIntegerOpLatency;
 }
 
+void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
+}
+
 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
   last_visited_latency_ = kArmMulIntegerLatency;
 }
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index cb679fc..897e97d 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -103,6 +103,7 @@
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
   M(IntermediateAddress, unused)                 \
+  M(IntermediateAddressIndex, unused)            \
   M(DataProcWithShifterOp, unused)
 
 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 558dcc4..83b487f 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -16,6 +16,7 @@
 
 #include "scheduler_arm64.h"
 #include "code_generator_utils.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace arm64 {
@@ -43,6 +44,13 @@
   last_visited_latency_ = kArm64IntegerOpLatency + 2;
 }
 
+void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) {
+  // Although the code generated is a simple `add` instruction, we found through empirical results
+  // that spacing it from its use in memory accesses was beneficial.
+  last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2;
+}
+
 void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
   last_visited_latency_ = kArm64MulIntegerLatency;
 }
@@ -192,5 +200,148 @@
   }
 }
 
+void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) {
+  if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+    last_visited_latency_ = kArm64SIMDFloatingPointOpLatency;
+  } else {
+    last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
+    HVecReplicateScalar* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDReplicateOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNeg(HVecNeg* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) {
+  if (instr->GetPackedType() == Primitive::kPrimBoolean) {
+    last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency;
+  }
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAdd(HVecAdd* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) {
+  if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+    last_visited_latency_ = kArm64SIMDMulFloatingPointLatency;
+  } else {
+    last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) {
+  if (instr->GetPackedType() == Primitive::kPrimFloat) {
+    last_visited_latency_ = kArm64SIMDDivFloatLatency;
+  } else {
+    DCHECK(instr->GetPackedType() == Primitive::kPrimDouble);
+    last_visited_latency_ = kArm64SIMDDivDoubleLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMin(HVecMin* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShl(HVecShl* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShr(HVecShr* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
+    HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::HandleVecAddress(
+    HVecMemoryOperation* instruction,
+    size_t size ATTRIBUTE_UNUSED) {
+  HInstruction* index = instruction->InputAt(1);
+  if (!index->IsConstant()) {
+    last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) {
+  last_visited_internal_latency_ = 0;
+  size_t size = Primitive::ComponentSize(instr->GetPackedType());
+
+  if (instr->GetPackedType() == Primitive::kPrimChar
+      && mirror::kUseStringCompression
+      && instr->IsStringCharAt()) {
+    // Set latencies for the uncompressed case.
+    last_visited_internal_latency_ += kArm64MemoryLoadLatency + kArm64BranchLatency;
+    HandleVecAddress(instr, size);
+    last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+  } else {
+    HandleVecAddress(instr, size);
+    last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) {
+  last_visited_internal_latency_ = 0;
+  size_t size = Primitive::ComponentSize(instr->GetPackedType());
+  HandleVecAddress(instr, size);
+  last_visited_latency_ = kArm64SIMDMemoryStoreLatency;
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 7a33720..63d5b7d 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -42,6 +42,18 @@
 static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
 static constexpr uint32_t kArm64MulIntegerLatency = 6;
 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
+
+static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
+static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
+static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
+static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
+static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
+static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
+static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
+static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
+static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
+static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
 
 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
  public:
@@ -52,29 +64,54 @@
 
 // We add a second unused parameter to be able to use this macro like the others
 // defined in `nodes.h`.
-#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
-  M(ArrayGet         , unused)                   \
-  M(ArrayLength      , unused)                   \
-  M(ArraySet         , unused)                   \
-  M(BinaryOperation  , unused)                   \
-  M(BoundsCheck      , unused)                   \
-  M(Div              , unused)                   \
-  M(InstanceFieldGet , unused)                   \
-  M(InstanceOf       , unused)                   \
-  M(Invoke           , unused)                   \
-  M(LoadString       , unused)                   \
-  M(Mul              , unused)                   \
-  M(NewArray         , unused)                   \
-  M(NewInstance      , unused)                   \
-  M(Rem              , unused)                   \
-  M(StaticFieldGet   , unused)                   \
-  M(SuspendCheck     , unused)                   \
-  M(TypeConversion   , unused)
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
+  M(ArrayGet             , unused)                   \
+  M(ArrayLength          , unused)                   \
+  M(ArraySet             , unused)                   \
+  M(BinaryOperation      , unused)                   \
+  M(BoundsCheck          , unused)                   \
+  M(Div                  , unused)                   \
+  M(InstanceFieldGet     , unused)                   \
+  M(InstanceOf           , unused)                   \
+  M(Invoke               , unused)                   \
+  M(LoadString           , unused)                   \
+  M(Mul                  , unused)                   \
+  M(NewArray             , unused)                   \
+  M(NewInstance          , unused)                   \
+  M(Rem                  , unused)                   \
+  M(StaticFieldGet       , unused)                   \
+  M(SuspendCheck         , unused)                   \
+  M(TypeConversion       , unused)                   \
+  M(VecReplicateScalar   , unused)                   \
+  M(VecSetScalars        , unused)                   \
+  M(VecSumReduce         , unused)                   \
+  M(VecCnv               , unused)                   \
+  M(VecNeg               , unused)                   \
+  M(VecAbs               , unused)                   \
+  M(VecNot               , unused)                   \
+  M(VecAdd               , unused)                   \
+  M(VecHalvingAdd        , unused)                   \
+  M(VecSub               , unused)                   \
+  M(VecMul               , unused)                   \
+  M(VecDiv               , unused)                   \
+  M(VecMin               , unused)                   \
+  M(VecMax               , unused)                   \
+  M(VecAnd               , unused)                   \
+  M(VecAndNot            , unused)                   \
+  M(VecOr                , unused)                   \
+  M(VecXor               , unused)                   \
+  M(VecShl               , unused)                   \
+  M(VecShr               , unused)                   \
+  M(VecUShr              , unused)                   \
+  M(VecMultiplyAccumulate, unused)                   \
+  M(VecLoad              , unused)                   \
+  M(VecStore             , unused)
 
 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
   M(IntermediateAddress, unused)                 \
+  M(IntermediateAddressIndex, unused)            \
   M(DataProcWithShifterOp, unused)
 
 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
@@ -85,6 +122,10 @@
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleSimpleArithmeticSIMD(HVecOperation *instr);
+  void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
 };
 
 class HSchedulerARM64 : public HScheduler {
@@ -101,6 +142,8 @@
         return true;
       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
         return true;
+      FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
+        return true;
       default:
         return HScheduler::IsSchedulable(instruction);
     }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 53e73c3..dcdf3bc 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -74,6 +74,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_writer.h"
 #include "os.h"
@@ -477,6 +478,16 @@
                                        android::base::LogId::DEFAULT,
                                        LogSeverity::FATAL,
                                        message.c_str());
+    // If we're on the host, try to dump all threads to get a sense of what's going on. This is
+    // restricted to the host as the dump may itself go bad.
+    // TODO: Use a double watchdog timeout, so we can enable this on-device.
+    if (!kIsTargetBuild && Runtime::Current() != nullptr) {
+      Runtime::Current()->AttachCurrentThread("Watchdog thread attached for dumping",
+                                              true,
+                                              nullptr,
+                                              false);
+      Runtime::Current()->DumpForSigQuit(std::cerr);
+    }
     exit(1);
   }
 
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 8567c00..d1afcb8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -27,6 +27,7 @@
 #include "dex_file_annotations.h"
 #include "dex_file-inl.h"
 #include "gc_root-inl.h"
+#include "invoke_type.h"
 #include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 856bfd2..d8dfdd7 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -24,19 +24,16 @@
 #include "base/enums.h"
 #include "dex_file.h"
 #include "gc_root.h"
-#include "invoke_type.h"
-#include "method_reference.h"
 #include "modifiers.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object.h"
 #include "obj_ptr.h"
+#include "offsets.h"
 #include "read_barrier_option.h"
-#include "utils.h"
 
 namespace art {
 
 template<class T> class Handle;
 class ImtConflictTable;
+enum InvokeType : uint32_t;
 union JValue;
 class OatQuickMethodHeader;
 class ProfilingInfo;
@@ -47,8 +44,13 @@
 namespace mirror {
 class Array;
 class Class;
+class ClassLoader;
+class DexCache;
 class IfTable;
+class Object;
+template <typename MirrorType> class ObjectArray;
 class PointerArray;
+class String;
 }  // namespace mirror
 
 class ArtMethod FINAL {
@@ -318,11 +320,11 @@
   }
 
   static MemberOffset DexMethodIndexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_);
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, dex_method_index_));
   }
 
   static MemberOffset MethodIndexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, method_index_));
   }
 
   uint32_t GetCodeItemOffset() {
@@ -524,10 +526,6 @@
 
   bool IsImtUnimplementedMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  MethodReference ToMethodReference() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return MethodReference(GetDexFile(), GetDexMethodIndex());
-  }
-
   // Find the catch block for the given exception type and dex_pc. When a catch block is found,
   // indicates whether the found catch block is responsible for clearing the exception or whether
   // a move-exception instruction is present.
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index b28eb72..be20920 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -198,7 +198,7 @@
                                                         kFractionalDigits)
      << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit, kFractionalDigits) << " "
      << "Avg: " << FormatDuration(Mean() * kAdjust, unit, kFractionalDigits) << " Max: "
-     << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << "\n";
+     << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << std::endl;
 }
 
 template <class Value>
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index aab9839..a81c832 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -109,6 +109,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "trace.h"
+#include "utf.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
@@ -3982,6 +3983,12 @@
 }
 
 mirror::Class* ClassLinker::LookupClass(Thread* self,
+                           const char* descriptor,
+                           ObjPtr<mirror::ClassLoader> class_loader) {
+  return LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor), class_loader);
+}
+
+mirror::Class* ClassLinker::LookupClass(Thread* self,
                                         const char* descriptor,
                                         size_t hash,
                                         ObjPtr<mirror::ClassLoader> class_loader) {
@@ -4604,7 +4611,10 @@
   DCHECK(out != nullptr);
   out->CopyFrom(proxy_constructor, image_pointer_size_);
   // Make this constructor public and fix the class to be our Proxy version
-  out->SetAccessFlags((out->GetAccessFlags() & ~kAccProtected) | kAccPublic);
+  // Mark kAccCompileDontBother so that we don't take JIT samples for the method. b/62349349
+  out->SetAccessFlags((out->GetAccessFlags() & ~kAccProtected) |
+                      kAccPublic |
+                      kAccCompileDontBother);
   out->SetDeclaringClass(klass.Get());
 }
 
@@ -4638,7 +4648,8 @@
   // preference to the invocation handler.
   const uint32_t kRemoveFlags = kAccAbstract | kAccDefault | kAccDefaultConflict;
   // Make the method final.
-  const uint32_t kAddFlags = kAccFinal;
+  // Mark kAccCompileDontBother so that we don't take JIT samples for the method. b/62349349
+  const uint32_t kAddFlags = kAccFinal | kAccCompileDontBother;
   out->SetAccessFlags((out->GetAccessFlags() & ~kRemoveFlags) | kAddFlags);
 
   // Clear the dex_code_item_offset_. It needs to be 0 since proxy methods have no CodeItems but the
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index fad6e9e..1e8125e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -212,9 +212,7 @@
                              const char* descriptor,
                              ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    return LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor), class_loader);
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Finds all the classes with the given descriptor, regardless of ClassLoader.
   void LookupClasses(const char* descriptor, std::vector<ObjPtr<mirror::Class>>& classes)
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index 35fce40..b15d82f 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -18,6 +18,8 @@
 #define ART_RUNTIME_CLASS_TABLE_INL_H_
 
 #include "class_table.h"
+
+#include "gc_root-inl.h"
 #include "oat_file.h"
 
 namespace art {
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 0891d3f..b71610a 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "class_table.h"
+#include "class_table-inl.h"
 
 #include "mirror/class-inl.h"
 #include "oat_file.h"
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 591ba42..3de78ed 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -28,7 +28,6 @@
 #include "invoke_type.h"
 #include "jni.h"
 #include "modifiers.h"
-#include "utf.h"
 
 namespace art {
 
diff --git a/runtime/gc/accounting/bitmap.h b/runtime/gc/accounting/bitmap.h
index eb00472..d039d88 100644
--- a/runtime/gc/accounting/bitmap.h
+++ b/runtime/gc/accounting/bitmap.h
@@ -25,7 +25,6 @@
 
 #include "base/mutex.h"
 #include "globals.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 76247bc..7097f87 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -19,7 +19,6 @@
 
 #include "base/allocator.h"
 #include "base/logging.h"
-#include "object_callbacks.h"
 #include "space_bitmap.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index 5594781..c332f96 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -19,7 +19,6 @@
 
 #include "base/allocator.h"
 #include "globals.h"
-#include "object_callbacks.h"
 #include "safe_map.h"
 
 #include <set>
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index b136488..889f57b 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -25,7 +25,6 @@
 
 #include "base/mutex.h"
 #include "globals.h"
-#include "object_callbacks.h"
 
 namespace art {
 
@@ -35,6 +34,9 @@
 }  // namespace mirror
 class MemMap;
 
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace gc {
 namespace accounting {
 
diff --git a/runtime/gc/allocation_listener.h b/runtime/gc/allocation_listener.h
index d694a68..21fa214 100644
--- a/runtime/gc/allocation_listener.h
+++ b/runtime/gc/allocation_listener.h
@@ -23,7 +23,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "gc_root.h"
 
 namespace art {
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 122f779..2257b81 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -20,6 +20,7 @@
 #include "base/enums.h"
 #include "base/stl_util.h"
 #include "obj_ptr-inl.h"
+#include "object_callbacks.h"
 #include "stack.h"
 
 #ifdef ART_TARGET_ANDROID
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 227c7ad..d31e442 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -22,12 +22,12 @@
 
 #include "base/mutex.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "gc_root.h"
 
 namespace art {
 
 class ArtMethod;
+class IsMarkedVisitor;
 class Thread;
 
 namespace mirror {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index f8ca8db..7b4340e 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -21,9 +21,7 @@
 #include "garbage_collector.h"
 #include "immune_spaces.h"
 #include "jni.h"
-#include "object_callbacks.h"
 #include "offsets.h"
-#include "mirror/object.h"
 #include "mirror/object_reference.h"
 #include "safe_map.h"
 
@@ -34,6 +32,10 @@
 class Closure;
 class RootInfo;
 
+namespace mirror {
+class Object;
+}  // namespace mirror
+
 namespace gc {
 
 namespace accounting {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 85727c2..0bf4095 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -28,7 +28,6 @@
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
 #include "lock_word.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 5a9b9f8..b9e06f9 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -27,7 +27,6 @@
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 9d6e74d..d3858ba 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -27,7 +27,6 @@
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
 #include "mirror/object_reference.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 24f4ce2..0289250 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -34,7 +34,6 @@
 #include "globals.h"
 #include "handle.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "process_state.h"
 #include "safe_map.h"
@@ -43,6 +42,7 @@
 namespace art {
 
 class ConditionVariable;
+class IsMarkedVisitor;
 class Mutex;
 class RootVisitor;
 class StackVisitor;
@@ -51,6 +51,9 @@
 class TimingLogger;
 class VariableSizedHandleScope;
 
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace mirror {
   class Class;
   class Object;
diff --git a/runtime/gc/heap_verification_test.cc b/runtime/gc/heap_verification_test.cc
index 2cdfc16..a307c51 100644
--- a/runtime/gc/heap_verification_test.cc
+++ b/runtime/gc/heap_verification_test.cc
@@ -17,7 +17,7 @@
 #include "common_runtime_test.h"
 
 #include "base/memory_tool.h"
-#include "class_linker.h"
+#include "class_linker-inl.h"
 #include "handle_scope-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
diff --git a/runtime/gc/reference_processor-inl.h b/runtime/gc/reference_processor-inl.h
index f619a15..0f47d3d 100644
--- a/runtime/gc/reference_processor-inl.h
+++ b/runtime/gc/reference_processor-inl.h
@@ -19,6 +19,8 @@
 
 #include "reference_processor.h"
 
+#include "mirror/reference-inl.h"
+
 namespace art {
 namespace gc {
 
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 886c950..52da763 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -22,6 +22,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "object_callbacks.h"
 #include "reference_processor-inl.h"
 #include "reflection.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 38b68cb..a8135d9 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -20,11 +20,11 @@
 #include "base/mutex.h"
 #include "globals.h"
 #include "jni.h"
-#include "object_callbacks.h"
 #include "reference_queue.h"
 
 namespace art {
 
+class IsMarkedVisitor;
 class TimingLogger;
 
 namespace mirror {
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index fd5dcf9..321d22a 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -22,6 +22,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "object_callbacks.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index b73a880..c48d48c 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -27,7 +27,6 @@
 #include "globals.h"
 #include "jni.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "thread_pool.h"
 
@@ -36,6 +35,9 @@
 class Reference;
 }  // namespace mirror
 
+class IsMarkedVisitor;
+class MarkObjectVisitor;
+
 namespace gc {
 
 namespace collector {
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index e9982e9..566dc5d 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -17,10 +17,17 @@
 #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 
-#include "object_callbacks.h"
 #include "space.h"
 
 namespace art {
+
+namespace mirror {
+class Object;
+}
+
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace gc {
 
 namespace collector {
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 9282ec7..7ec54f5 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -26,6 +26,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 27f30e0..8d8c488 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -255,13 +255,28 @@
   MutexLock mu(Thread::Current(), region_lock_);
   VerifyNonFreeRegionLimit();
   size_t new_non_free_region_index_limit = 0;
+
+  // Combine zeroing and releasing pages to reduce how often madvise is called. This helps
+  // reduce contention on the mmap semaphore. b/62194020
+  // clear_region adds a region to the current block. If the region is not adjacent, the
+  // clear block is zeroed, released, and a new block begins.
+  uint8_t* clear_block_begin = nullptr;
+  uint8_t* clear_block_end = nullptr;
+  auto clear_region = [&clear_block_begin, &clear_block_end](Region* r) {
+    r->Clear(/*zero_and_release_pages*/false);
+    if (clear_block_end != r->Begin()) {
+      ZeroAndReleasePages(clear_block_begin, clear_block_end - clear_block_begin);
+      clear_block_begin = r->Begin();
+    }
+    clear_block_end = r->End();
+  };
   for (size_t i = 0; i < std::min(num_regions_, non_free_region_index_limit_); ++i) {
     Region* r = &regions_[i];
     if (r->IsInFromSpace()) {
       *cleared_bytes += r->BytesAllocated();
       *cleared_objects += r->ObjectsAllocated();
       --num_non_free_regions_;
-      r->Clear();
+      clear_region(r);
     } else if (r->IsInUnevacFromSpace()) {
       if (r->LiveBytes() == 0) {
         // Special case for 0 live bytes, this means all of the objects in the region are dead and
@@ -274,13 +289,13 @@
         // Also release RAM for large tails.
         while (i + free_regions < num_regions_ && regions_[i + free_regions].IsLargeTail()) {
           DCHECK(r->IsLarge());
-          regions_[i + free_regions].Clear();
+          clear_region(&regions_[i + free_regions]);
           ++free_regions;
         }
         *cleared_bytes += r->BytesAllocated();
         *cleared_objects += r->ObjectsAllocated();
         num_non_free_regions_ -= free_regions;
-        r->Clear();
+        clear_region(r);
         GetLiveBitmap()->ClearRange(
             reinterpret_cast<mirror::Object*>(r->Begin()),
             reinterpret_cast<mirror::Object*>(r->Begin() + free_regions * kRegionSize));
@@ -317,6 +332,8 @@
                                                  last_checked_region->Idx() + 1);
     }
   }
+  // Clear pages for the last block since clearing happens when a new block opens.
+  ZeroAndReleasePages(clear_block_begin, clear_block_end - clear_block_begin);
   // Update non_free_region_index_limit_.
   SetNonFreeRegionLimit(new_non_free_region_index_limit);
   evac_region_ = nullptr;
@@ -369,7 +386,7 @@
     if (!r->IsFree()) {
       --num_non_free_regions_;
     }
-    r->Clear();
+    r->Clear(/*zero_and_release_pages*/true);
   }
   SetNonFreeRegionLimit(0);
   current_region_ = &full_region_;
@@ -395,7 +412,7 @@
     } else {
       DCHECK(reg->IsLargeTail());
     }
-    reg->Clear();
+    reg->Clear(/*zero_and_release_pages*/true);
     --num_non_free_regions_;
   }
   if (end_addr < Limit()) {
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 1d1d27e..323ccdb 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -284,14 +284,16 @@
       return type_;
     }
 
-    void Clear() {
+    void Clear(bool zero_and_release_pages) {
       top_.StoreRelaxed(begin_);
       state_ = RegionState::kRegionStateFree;
       type_ = RegionType::kRegionTypeNone;
       objects_allocated_.StoreRelaxed(0);
       alloc_time_ = 0;
       live_bytes_ = static_cast<size_t>(-1);
-      ZeroAndReleasePages(begin_, end_ - begin_);
+      if (zero_and_release_pages) {
+        ZeroAndReleasePages(begin_, end_ - begin_);
+      }
       is_newly_allocated_ = false;
       is_a_tlab_ = false;
       thread_ = nullptr;
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 8d8b745..9e900e4 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -24,6 +24,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 79d6201..6d52d95 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -28,7 +28,6 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "read_barrier_option.h"
 
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 3e19146..2bac231 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -27,6 +27,8 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/string-inl.h"
+#include "object_callbacks.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utf.h"
 
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 68454fb..2ec03be 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -25,10 +25,11 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "gc/weak_root_state.h"
-#include "object_callbacks.h"
 
 namespace art {
 
+class IsMarkedVisitor;
+
 namespace gc {
 namespace space {
 class ImageSpace;
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 311515c..bb27b34 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -23,6 +23,7 @@
 #include "handle_scope-inl.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
+#include "utf.h"
 
 namespace art {
 
diff --git a/runtime/invoke_type.h b/runtime/invoke_type.h
index de07c72..a003f7f 100644
--- a/runtime/invoke_type.h
+++ b/runtime/invoke_type.h
@@ -21,7 +21,7 @@
 
 namespace art {
 
-enum InvokeType {
+enum InvokeType : uint32_t {
   kStatic,     // <<static>>
   kDirect,     // <<direct>>
   kVirtual,    // <<virtual>>
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 4137633..2ad3b29 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "jni_internal.h"
+#include "java_vm_ext.h"
 
 #include <dlfcn.h>
 
@@ -30,11 +30,12 @@
 #include "fault_handler.h"
 #include "gc_root-inl.h"
 #include "indirect_reference_table-inl.h"
+#include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "nativebridge/native_bridge.h"
 #include "nativeloader/native_loader.h"
-#include "java_vm_ext.h"
+#include "object_callbacks.h"
 #include "parsed_options.h"
 #include "runtime-inl.h"
 #include "runtime_options.h"
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 7374920..50aabdc 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -32,6 +32,7 @@
 }  // namespace mirror
 
 class ArtMethod;
+class IsMarkedVisitor;
 class Libraries;
 class ParsedOptions;
 class Runtime;
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 75f9b0a..f898d41 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -23,7 +23,6 @@
 #include "base/timing_logger.h"
 #include "jit/profile_saver_options.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "profile_compilation_info.h"
 #include "thread_pool.h"
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 388a517..0cafac7 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -35,6 +35,7 @@
 #include "mem_map.h"
 #include "oat_file-inl.h"
 #include "oat_quick_method_header.h"
+#include "object_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
 #include "thread_list.h"
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index eea2771..9ecc876 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -29,7 +29,6 @@
 #include "jni.h"
 #include "method_reference.h"
 #include "oat_file.h"
-#include "object_callbacks.h"
 #include "profile_compilation_info.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -39,6 +38,7 @@
 class ArtMethod;
 class LinearAlloc;
 class InlineCache;
+class IsMarkedVisitor;
 class OatQuickMethodHeader;
 class ProfilingInfo;
 
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 60e4295..af933ae 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -22,7 +22,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "indirect_reference_table.h"
-#include "object_callbacks.h"
 #include "obj_ptr.h"
 #include "reference_table.h"
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 6c39361..12793e4 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -952,6 +952,9 @@
 }
 
 void ZeroAndReleasePages(void* address, size_t length) {
+  if (length == 0) {
+    return;
+  }
   uint8_t* const mem_begin = reinterpret_cast<uint8_t*>(address);
   uint8_t* const mem_end = mem_begin + length;
   uint8_t* const page_begin = AlignUp(mem_begin, kPageSize);
diff --git a/runtime/mirror/accessible_object.h b/runtime/mirror/accessible_object.h
index 2581ac2..a217193 100644
--- a/runtime/mirror/accessible_object.h
+++ b/runtime/mirror/accessible_object.h
@@ -20,7 +20,6 @@
 #include "class.h"
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 #include "thread.h"
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 51d9d24..7287a92 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -22,7 +22,6 @@
 #include "gc/allocator_type.h"
 #include "obj_ptr.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5122b37..c8d4557 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -23,13 +23,14 @@
 #include "art_method.h"
 #include "base/array_slice.h"
 #include "base/length_prefixed_array.h"
-#include "class_linker-inl.h"
+#include "class_linker.h"
 #include "class_loader.h"
 #include "common_throws.h"
+#include "dex_cache.h"
 #include "dex_file-inl.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
-#include "object_array-inl.h"
+#include "object_array.h"
 #include "object-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index dfb2788..dfdd162 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -29,7 +29,6 @@
 #include "modifiers.h"
 #include "object.h"
 #include "object_array.h"
-#include "object_callbacks.h"
 #include "primitive.h"
 #include "read_barrier_option.h"
 #include "stride_iterator.h"
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
index 708665d..75a3800 100644
--- a/runtime/mirror/class_ext.h
+++ b/runtime/mirror/class_ext.h
@@ -23,7 +23,6 @@
 #include "gc_root.h"
 #include "object.h"
 #include "object_array.h"
-#include "object_callbacks.h"
 #include "string.h"
 
 namespace art {
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index f5ecdae..39c8ee0 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -19,9 +19,7 @@
 
 #include "class_loader.h"
 
-#include "base/mutex-inl.h"
 #include "class_table-inl.h"
-#include "obj_ptr-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index 6e1f44b..381d96b 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -17,7 +17,10 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_LOADER_H_
 #define ART_RUNTIME_MIRROR_CLASS_LOADER_H_
 
+#include "base/mutex.h"
 #include "object.h"
+#include "object_reference.h"
+#include "obj_ptr.h"
 
 namespace art {
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index c95d92e..96e3475 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -23,6 +23,7 @@
 #include "gc/heap.h"
 #include "globals.h"
 #include "linear_alloc.h"
+#include "oat_file.h"
 #include "object.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
diff --git a/runtime/mirror/executable.h b/runtime/mirror/executable.h
index 6c465f6..8a28f66 100644
--- a/runtime/mirror/executable.h
+++ b/runtime/mirror/executable.h
@@ -20,7 +20,6 @@
 #include "accessible_object.h"
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 
 namespace art {
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index 222d709..40186a6 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -22,7 +22,6 @@
 #include "gc_root.h"
 #include "obj_ptr.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 
 namespace art {
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index d3fc95f..95f829d 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -26,8 +26,7 @@
 #include "class-inl.h"
 #include "class_flags.h"
 #include "class_linker.h"
-#include "class_loader-inl.h"
-#include "dex_cache-inl.h"
+#include "dex_cache.h"
 #include "lock_word-inl.h"
 #include "monitor.h"
 #include "object_array-inl.h"
diff --git a/runtime/mirror/object-refvisitor-inl.h b/runtime/mirror/object-refvisitor-inl.h
index 49ab7c2..f5ab4dd 100644
--- a/runtime/mirror/object-refvisitor-inl.h
+++ b/runtime/mirror/object-refvisitor-inl.h
@@ -19,7 +19,9 @@
 
 #include "object-inl.h"
 
+#include "class_loader-inl.h"
 #include "class-refvisitor-inl.h"
+#include "dex_cache-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index f8de6e6..84e5494 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -19,6 +19,7 @@
 
 #include "reference.h"
 
+#include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
 #include "runtime.h"
 
@@ -48,6 +49,12 @@
   return SetFieldObjectVolatile<kTransactionActive>(ZombieOffset(), zombie);
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+inline Class* Reference::GetJavaLangRefReference() {
+  DCHECK(!java_lang_ref_Reference_.IsNull());
+  return java_lang_ref_Reference_.Read<kReadBarrierOption>();
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index cfcbd5a..b10c294 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -18,6 +18,8 @@
 #define ART_RUNTIME_MIRROR_REFERENCE_H_
 
 #include "base/enums.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "class.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
@@ -97,10 +99,7 @@
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  static Class* GetJavaLangRefReference() REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(!java_lang_ref_Reference_.IsNull());
-    return java_lang_ref_Reference_.Read<kReadBarrierOption>();
-  }
+  static ALWAYS_INLINE Class* GetJavaLangRefReference() REQUIRES_SHARED(Locks::mutator_lock_);
   static void SetClass(ObjPtr<Class> klass);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index c00cf91..53de821 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -19,6 +19,7 @@
 #include "class.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "object-inl.h"
 #include "handle_scope-inl.h"
 #include "string.h"
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index d32d8dc..87e8a1f 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -19,7 +19,6 @@
 
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index de0e75b..80745d2 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -18,8 +18,10 @@
 
 #include "arch/memcmp16.h"
 #include "array.h"
+#include "base/array_ref.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "handle_scope-inl.h"
 #include "intern_table.h"
 #include "object-inl.h"
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index b59bbfb..7fbe8bd 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -20,7 +20,6 @@
 #include "gc_root.h"
 #include "gc/allocator_type.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index e50409f..7027410 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -26,7 +26,9 @@
 #include "object-inl.h"
 #include "object_array.h"
 #include "object_array-inl.h"
+#include "object_callbacks.h"
 #include "stack_trace_element.h"
+#include "string.h"
 #include "utils.h"
 #include "well_known_classes.h"
 
@@ -169,5 +171,17 @@
   java_lang_Throwable_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+Object* Throwable::GetStackState() {
+  return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
+}
+
+Object* Throwable::GetStackTrace() {
+  return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
+}
+
+String* Throwable::GetDetailMessage() {
+  return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_));
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 0a4ab6f..fb45228 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -19,23 +19,22 @@
 
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
-#include "string.h"
 
 namespace art {
 
+class RootVisitor;
 struct ThrowableOffsets;
 
 namespace mirror {
 
+class String;
+
 // C++ mirror of java.lang.Throwable
 class MANAGED Throwable : public Object {
  public:
   void SetDetailMessage(ObjPtr<String> new_detail_message) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  String* GetDetailMessage() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_));
-  }
+  String* GetDetailMessage() REQUIRES_SHARED(Locks::mutator_lock_);
 
   std::string Dump() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -59,12 +58,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  Object* GetStackState() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
-  }
-  Object* GetStackTrace() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
-  }
+  Object* GetStackState() REQUIRES_SHARED(Locks::mutator_lock_);
+  Object* GetStackTrace() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Object> backtrace_;  // Note this is Java volatile:
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index f94edcd..a617818 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -31,6 +31,7 @@
 #include "lock_word-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "object_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
 #include "thread.h"
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 6dc706f..96c5a5b 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -30,13 +30,13 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "lock_word.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 #include "thread_state.h"
 
 namespace art {
 
 class ArtMethod;
+class IsMarkedVisitor;
 class LockWord;
 template<class T> class Handle;
 class StackVisitor;
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 8423e04..010c6f8 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -26,7 +26,6 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 5094189..95aba79 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -165,7 +165,7 @@
   if (dump_native_stack) {
     DumpNativeStack(os, tid, nullptr, "  native: ");
   }
-  os << "\n";
+  os << std::endl;
 }
 
 void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
@@ -216,11 +216,10 @@
       ScopedObjectAccess soa(self);
       thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
     }
-    local_os << "\n";
     {
       // Use the logging lock to ensure serialization when writing to the common ostream.
       MutexLock mu(self, *Locks::logging_lock_);
-      *os_ << local_os.str();
+      *os_ << local_os.str() << std::endl;
     }
     barrier_.Pass(self);
   }
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 56ff0a1..907d37e 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -19,8 +19,10 @@
 #include "base/stl_util.h"
 #include "base/logging.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "intern_table.h"
 #include "mirror/class-inl.h"
+#include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 921de03..747c2d0 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -22,7 +22,6 @@
 #include "base/value_object.h"
 #include "dex_file_types.h"
 #include "gc_root.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "primitive.h"
 #include "safe_map.h"
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 25baac5..6c01a79 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -30,7 +30,6 @@
 #include "gc_root.h"
 #include "handle_scope.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "primitive.h"
 
 namespace art {
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index ca52a99..7677025 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -610,7 +610,7 @@
   if [ "$HOST" != "n" ]; then
     # Use SIGRTMIN+2 to try to dump threads.
     # Use -k 1m to SIGKILL it a minute later if it hasn't ended.
-    dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 1m ${dex2oat_cmdline}"
+    dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 90s ${dex2oat_cmdline} --watchdog-timeout=60000"
   fi
   if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
     vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex --output-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"