Use direct class pointers at allocation sites in the compiled code.

- Rather than looking up a class from its type ID (and checking if
  it's resolved/initialized, resolving/initializing if not), use
  direct class pointers, if possible (boot-code-to-boot-class pointers
  and app-code-to-boot-class pointers.)
- This results in a 1-2% speedup in Ritz MemAllocTest on Nexus 4.
- Embedding the object size (along with class pointers) caused a 1-2%
  slowdown in MemAllocTest and isn't implemented in this change.
- TODO: do the same for array allocations.
- TODO: when/if an application gets its own image, implement
  app-code-to-app-class pointers.
- Fix a -XX:gc bug.
  cf. https://android-review.googlesource.com/79460/
- Add /tmp/android-data/dalvik-cache to the list of locations to
  remove oat files in clean-oat-host.
  cf. https://android-review.googlesource.com/79550
- Add back a dropped UNLIKELY in FindMethodFromCode().
  cf. https://android-review.googlesource.com/74205

Bug: 9986565
Change-Id: I590b96bd21f7a7472f88e36752e675547559a5b1
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 29554c0..2ce7ecd 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -440,6 +440,20 @@
     PushPointer(code_buffer_, &id);
     data_lir = NEXT_LIR(data_lir);
   }
+  // Push class literals.
+  data_lir = class_literal_list_;
+  while (data_lir != NULL) {
+    uint32_t target = data_lir->operands[0];
+    cu_->compiler_driver->AddClassPatch(cu_->dex_file,
+                                        cu_->class_def_idx,
+                                        cu_->method_idx,
+                                        target,
+                                        code_buffer_.size());
+    const DexFile::TypeId& id = cu_->dex_file->GetTypeId(target);
+    // unique value based on target to ensure code deduplication works
+    PushPointer(code_buffer_, &id);
+    data_lir = NEXT_LIR(data_lir);
+  }
 }
 
 /* Write the switch tables to the output stream */
@@ -772,6 +786,7 @@
   offset = AssignLiteralOffsetCommon(literal_list_, offset);
   offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset);
   offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset);
+  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset);
   return offset;
 }
 
@@ -960,6 +975,7 @@
     : Backend(arena),
       literal_list_(NULL),
       method_literal_list_(NULL),
+      class_literal_list_(NULL),
       code_literal_list_(NULL),
       first_fixup_(NULL),
       cu_(cu),
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3bd0298..daf21df 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -19,6 +19,7 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
+#include "mirror/object-inl.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -883,13 +884,53 @@
   // alloc will always check for resolution, do we also need to verify
   // access because the verifier was unable to?
   ThreadOffset func_offset(-1);
-  if (cu_->compiler_driver->CanAccessInstantiableTypeWithoutChecks(
-      cu_->method_idx, *cu_->dex_file, type_idx)) {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObject);
+  const DexFile* dex_file = cu_->dex_file;
+  CompilerDriver* driver = cu_->compiler_driver;
+  if (driver->CanAccessInstantiableTypeWithoutChecks(
+      cu_->method_idx, *dex_file, type_idx)) {
+    bool is_type_initialized;
+    bool use_direct_type_ptr;
+    uintptr_t direct_type_ptr;
+    if (kEmbedClassInCode &&
+        driver->CanEmbedTypeInCode(*dex_file, type_idx,
+                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+      // The fast path.
+      if (!use_direct_type_ptr) {
+        // Use the literal pool and a PC-relative load from a data word.
+        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+        if (data_target == nullptr) {
+          data_target = AddWordData(&class_literal_list_, type_idx);
+        }
+        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
+        AppendLIR(load_pc_rel);
+        if (!is_type_initialized) {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
+          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+        } else {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectInitialized);
+          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+        }
+      } else {
+        // Use the direct pointer.
+        if (!is_type_initialized) {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
+          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+        } else {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectInitialized);
+          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+        }
+      }
+    } else {
+      // The slow path.
+      DCHECK_EQ(func_offset.Int32Value(), -1);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObject);
+      CallRuntimeHelperImmMethod(func_offset, type_idx, true);
+    }
+    DCHECK_NE(func_offset.Int32Value(), -1);
   } else {
     func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectWithAccessCheck);
+    CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   RegLocation rl_result = GetReturn(false);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index d942a24..f865207 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -142,6 +142,17 @@
   CallHelper(r_tgt, helper_offset, safepoint_pc);
 }
 
+void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset helper_offset, int arg0, bool safepoint_pc) {
+  int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_NE(TargetReg(kArg1), arg0);
+  if (TargetReg(kArg0) != arg0) {
+    OpRegCopy(TargetReg(kArg0), arg0);
+  }
+  LoadCurrMethodDirect(TargetReg(kArg1));
+  ClobberCallerSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
 void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset helper_offset, RegLocation arg0,
                                                       RegLocation arg1, bool safepoint_pc) {
   int r_tgt = CallHelperSetup(helper_offset);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index c157327..f9d9e9e 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -529,6 +529,7 @@
                                  bool safepoint_pc);
     void CallRuntimeHelperImmMethod(ThreadOffset helper_offset, int arg0,
                                     bool safepoint_pc);
+    void CallRuntimeHelperRegMethod(ThreadOffset helper_offset, int arg0, bool safepoint_pc);
     void CallRuntimeHelperRegLocationRegLocation(ThreadOffset helper_offset,
                                                  RegLocation arg0, RegLocation arg1,
                                                  bool safepoint_pc);
@@ -855,6 +856,7 @@
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
     LIR* method_literal_list_;                 // Method literals requiring patching.
+    LIR* class_literal_list_;                  // Class literals requiring patching.
     LIR* code_literal_list_;                   // Code literals requiring patching.
     LIR* first_fixup_;                         // Doubly-linked list of LIR nodes requiring fixups.