Use intrinsic codegen for compiling intrinsic methods.

When compiling an intrinsic method, generate a graph that
invokes the same method and try to compile it. If the call
is actually intrinsified (or simplified to other HIR) and
yields a leaf method, use the result of this compilation
attempt, otherwise compile the actual code or JNI stub.

Note that CodeGenerator::CreateThrowingSlowPathLocations()
actually marks the locations as kNoCall if the throw is not
in a catch block, thus considering some throwing methods
(for example, String.charAt()) as leaf methods.

We would ideally want to use the intrinsic codegen for all
intrinsics that do not generate a slow-path call to the
default implementation. Relying on the leaf method is
suboptimal as we're missing out on methods that do other
types of calls, for example runtime calls. This shall be
fixed in a subsequent CL.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 67717501
Change-Id: I640fda7c22d4ff494b5ff77ebec3b7f5f75af652
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index fc6a717..e413718 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -26,8 +26,8 @@
                            InstructionSet instruction_set,
                            const ArrayRef<const uint8_t>& quick_code)
     : compiler_driver_(compiler_driver),
-      instruction_set_(instruction_set),
-      quick_code_(compiler_driver_->GetCompiledMethodStorage()->DeduplicateCode(quick_code)) {
+      quick_code_(compiler_driver_->GetCompiledMethodStorage()->DeduplicateCode(quick_code)),
+      packed_fields_(InstructionSetField::Encode(instruction_set)) {
 }
 
 CompiledCode::~CompiledCode() {
@@ -48,7 +48,7 @@
 }
 
 size_t CompiledCode::AlignCode(size_t offset) const {
-  return AlignCode(offset, instruction_set_);
+  return AlignCode(offset, GetInstructionSet());
 }
 
 size_t CompiledCode::AlignCode(size_t offset, InstructionSet instruction_set) {
@@ -56,7 +56,7 @@
 }
 
 size_t CompiledCode::CodeDelta() const {
-  return CodeDelta(instruction_set_);
+  return CodeDelta(GetInstructionSet());
 }
 
 size_t CompiledCode::CodeDelta(InstructionSet instruction_set) {
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 892bc59..acdce26 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -22,6 +22,8 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "base/bit_field.h"
+#include "base/bit_utils.h"
 
 namespace art {
 
@@ -44,7 +46,7 @@
   virtual ~CompiledCode();
 
   InstructionSet GetInstructionSet() const {
-    return instruction_set_;
+    return GetPackedField<InstructionSetField>();
   }
 
   ArrayRef<const uint8_t> GetQuickCode() const;
@@ -68,6 +70,11 @@
   static const void* CodePointer(const void* code_pointer, InstructionSet instruction_set);
 
  protected:
+  static constexpr size_t kInstructionSetFieldSize =
+      MinimumBitsToStore(static_cast<size_t>(InstructionSet::kLast));
+  static constexpr size_t kNumberOfCompiledCodePackedBits = kInstructionSetFieldSize;
+  static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte;
+
   template <typename T>
   static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array);
 
@@ -75,13 +82,26 @@
     return compiler_driver_;
   }
 
+  template <typename BitFieldType>
+  typename BitFieldType::value_type GetPackedField() const {
+    return BitFieldType::Decode(packed_fields_);
+  }
+
+  template <typename BitFieldType>
+  void SetPackedField(typename BitFieldType::value_type value) {
+    DCHECK(IsUint<BitFieldType::size>(static_cast<uintptr_t>(value)));
+    packed_fields_ = BitFieldType::Update(value, packed_fields_);
+  }
+
  private:
+  using InstructionSetField = BitField<InstructionSet, 0u, kInstructionSetFieldSize>;
+
   CompilerDriver* const compiler_driver_;
 
-  const InstructionSet instruction_set_;
-
-  // Used to store the PIC code for Quick.
+  // Used to store the compiled code.
   const LengthPrefixedArray<uint8_t>* const quick_code_;
+
+  uint32_t packed_fields_;
 };
 
 class CompiledMethod FINAL : public CompiledCode {
@@ -116,6 +136,18 @@
 
   static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m);
 
+  bool IsIntrinsic() const {
+    return GetPackedField<IsIntrinsicField>();
+  }
+
+  // Marks the compiled method as being generated using an intrinsic codegen.
+  // Such methods have no relationships to their code items.
+  // This affects debug information generated at link time.
+  void MarkAsIntrinsic() {
+    DCHECK(!IsIntrinsic());
+    SetPackedField<IsIntrinsicField>(/* value */ true);
+  }
+
   size_t GetFrameSizeInBytes() const {
     return frame_size_in_bytes_;
   }
@@ -137,6 +169,14 @@
   ArrayRef<const linker::LinkerPatch> GetPatches() const;
 
  private:
+  static constexpr size_t kIsIntrinsicLsb = kNumberOfCompiledCodePackedBits;
+  static constexpr size_t kIsIntrinsicSize = 1u;
+  static constexpr size_t kNumberOfCompiledMethodPackedBits = kIsIntrinsicLsb + kIsIntrinsicSize;
+  static_assert(kNumberOfCompiledMethodPackedBits <= CompiledCode::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
+  using IsIntrinsicField = BitField<bool, kIsIntrinsicLsb, kIsIntrinsicSize>;
+
   // For quick code, the size of the activation used by the code.
   const size_t frame_size_in_bytes_;
   // For quick code, a bit mask describing spilled GPR callee-save registers.
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 3aa84f8..85abd66 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -65,7 +65,8 @@
 
   virtual CompiledMethod* JniCompile(uint32_t access_flags,
                                      uint32_t method_idx,
-                                     const DexFile& dex_file) const = 0;
+                                     const DexFile& dex_file,
+                                     Handle<mirror::DexCache> dex_cache) const = 0;
 
   virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
                           jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 2b61727..37c2d32 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -202,7 +202,7 @@
       // Decode dex register locations for all stack maps.
       // It might be expensive, so do it just once and reuse the result.
       std::vector<DexRegisterMap> dex_reg_maps;
-      if (mi->code_info != nullptr) {
+      if (dex_code != nullptr && mi->code_info != nullptr) {
         const CodeInfo code_info(mi->code_info);
         CodeInfoEncoding encoding = code_info.ExtractEncoding();
         for (size_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); ++s) {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f4700d4..726401d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -516,7 +516,8 @@
       access_flags |= annotations::GetNativeMethodAnnotationAccessFlags(
           dex_file, dex_file.GetClassDef(class_def_idx), method_idx);
 
-      compiled_method = driver->GetCompiler()->JniCompile(access_flags, method_idx, dex_file);
+      compiled_method = driver->GetCompiler()->JniCompile(
+          access_flags, method_idx, dex_file, dex_cache);
       CHECK(compiled_method != nullptr);
     }
   } else if ((access_flags & kAccAbstract) != 0) {
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 2432f13..a6687fe 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -40,20 +40,20 @@
   // Create the first block for the dex instructions, single successor of the entry block.
   MaybeCreateBlockAt(0u);
 
-  if (code_item_.tries_size_ != 0) {
+  if (code_item_->tries_size_ != 0) {
     // Create branch targets at the start/end of the TryItem range. These are
     // places where the program might fall through into/out of the a block and
     // where TryBoundary instructions will be inserted later. Other edges which
     // enter/exit the try blocks are a result of branches/switches.
-    for (size_t idx = 0; idx < code_item_.tries_size_; ++idx) {
-      const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item_, idx);
+    for (size_t idx = 0; idx < code_item_->tries_size_; ++idx) {
+      const DexFile::TryItem* try_item = DexFile::GetTryItems(*code_item_, idx);
       uint32_t dex_pc_start = try_item->start_addr_;
       uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_;
       MaybeCreateBlockAt(dex_pc_start);
-      if (dex_pc_end < code_item_.insns_size_in_code_units_) {
+      if (dex_pc_end < code_item_->insns_size_in_code_units_) {
         // TODO: Do not create block if the last instruction cannot fall through.
         MaybeCreateBlockAt(dex_pc_end);
-      } else if (dex_pc_end == code_item_.insns_size_in_code_units_) {
+      } else if (dex_pc_end == code_item_->insns_size_in_code_units_) {
         // The TryItem spans until the very end of the CodeItem and therefore
         // cannot have any code afterwards.
       } else {
@@ -63,7 +63,7 @@
     }
 
     // Create branch targets for exception handlers.
-    const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+    const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(*code_item_, 0);
     uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
     for (uint32_t idx = 0; idx < handlers_size; ++idx) {
       CatchHandlerIterator iterator(handlers_ptr);
@@ -76,7 +76,7 @@
 
   // Iterate over all instructions and find branching instructions. Create blocks for
   // the locations these instructions branch to.
-  IterationRange<DexInstructionIterator> instructions = code_item_.Instructions();
+  IterationRange<DexInstructionIterator> instructions = code_item_->Instructions();
   for (const DexInstructionPcPair& pair : instructions) {
     const uint32_t dex_pc = pair.DexPc();
     const Instruction& instruction = pair.Inst();
@@ -127,7 +127,7 @@
   bool is_throwing_block = false;
   // Calculate the qucikening index here instead of CreateBranchTargets since it's easier to
   // calculate in dex_pc order.
-  for (const DexInstructionPcPair& pair : code_item_.Instructions()) {
+  for (const DexInstructionPcPair& pair : code_item_->Instructions()) {
     const uint32_t dex_pc = pair.DexPc();
     const Instruction& instruction = pair.Inst();
 
@@ -229,7 +229,7 @@
     }
   }
 
-  const Instruction& first = code_item_.InstructionAt(catch_block->GetDexPc());
+  const Instruction& first = code_item_->InstructionAt(catch_block->GetDexPc());
   if (first.Opcode() == Instruction::MOVE_EXCEPTION) {
     // Verifier guarantees that if a catch block begins with MOVE_EXCEPTION then
     // it has no live normal predecessors.
@@ -247,7 +247,7 @@
 }
 
 void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
-  if (code_item_.tries_size_ == 0) {
+  if (code_item_->tries_size_ == 0) {
     return;
   }
 
@@ -269,12 +269,12 @@
     // loop for synchronized blocks.
     if (ContainsElement(throwing_blocks_, block)) {
       // Try to find a TryItem covering the block.
-      const int32_t try_item_idx = DexFile::FindTryItem(DexFile::GetTryItems(code_item_, 0u),
-                                                        code_item_.tries_size_,
+      const int32_t try_item_idx = DexFile::FindTryItem(DexFile::GetTryItems(*code_item_, 0u),
+                                                        code_item_->tries_size_,
                                                         block->GetDexPc());
       if (try_item_idx != -1) {
         // Block throwing and in a TryItem. Store the try block information.
-        try_block_info.Put(block->GetBlockId(), DexFile::GetTryItems(code_item_, try_item_idx));
+        try_block_info.Put(block->GetBlockId(), DexFile::GetTryItems(*code_item_, try_item_idx));
       }
     }
   }
@@ -285,7 +285,7 @@
 
   // Iterate over catch blocks, create artifical landing pads if necessary to
   // simplify the CFG, and set metadata.
-  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(*code_item_, 0);
   uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
   for (uint32_t idx = 0; idx < handlers_size; ++idx) {
     CatchHandlerIterator iterator(handlers_ptr);
@@ -333,7 +333,7 @@
         HTryBoundary* try_entry = new (allocator_) HTryBoundary(
             HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
         try_block->CreateImmediateDominator()->AddInstruction(try_entry);
-        LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks);
+        LinkToCatchBlocks(try_entry, *code_item_, try_item, catch_blocks);
         break;
       }
     }
@@ -361,12 +361,13 @@
       HTryBoundary* try_exit =
           new (allocator_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
       graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
-      LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks);
+      LinkToCatchBlocks(try_exit, *code_item_, try_item, catch_blocks);
     }
   }
 }
 
 bool HBasicBlockBuilder::Build() {
+  DCHECK(code_item_ != nullptr);
   DCHECK(graph_->GetBlocks().empty());
 
   graph_->SetEntryBlock(new (allocator_) HBasicBlock(graph_, kNoDexPc));
@@ -383,6 +384,27 @@
   return true;
 }
 
+void HBasicBlockBuilder::BuildIntrinsic() {
+  DCHECK(code_item_ == nullptr);
+  DCHECK(graph_->GetBlocks().empty());
+
+  // Create blocks.
+  HBasicBlock* entry_block = new (allocator_) HBasicBlock(graph_, kNoDexPc);
+  HBasicBlock* exit_block = new (allocator_) HBasicBlock(graph_, kNoDexPc);
+  HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc */ kNoDexPc, /* store_dex_pc */ 0u);
+
+  // Add blocks to the graph.
+  graph_->AddBlock(entry_block);
+  graph_->AddBlock(body);
+  graph_->AddBlock(exit_block);
+  graph_->SetEntryBlock(entry_block);
+  graph_->SetExitBlock(exit_block);
+
+  // Connect blocks.
+  entry_block->AddSuccessor(body);
+  body->AddSuccessor(exit_block);
+}
+
 size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const {
   return quicken_index_for_dex_pc_.Get(dex_pc);
 }
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 79f7a7b..7d0f56d 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -28,14 +28,15 @@
  public:
   HBasicBlockBuilder(HGraph* graph,
                      const DexFile* const dex_file,
-                     const DexFile::CodeItem& code_item,
+                     const DexFile::CodeItem* code_item,
                      ScopedArenaAllocator* local_allocator)
       : allocator_(graph->GetAllocator()),
         graph_(graph),
         dex_file_(dex_file),
         code_item_(code_item),
         local_allocator_(local_allocator),
-        branch_targets_(code_item.insns_size_in_code_units_,
+        branch_targets_(code_item != nullptr ? code_item->insns_size_in_code_units_
+                                             : /* fake dex_pc=0 for intrinsic graph */ 1u,
                         nullptr,
                         local_allocator->Adapter(kArenaAllocGraphBuilder)),
         throwing_blocks_(kDefaultNumberOfThrowingBlocks,
@@ -50,6 +51,9 @@
   // exits a try block.
   bool Build();
 
+  // Creates basic blocks in `graph_` for compiling an intrinsic.
+  void BuildIntrinsic();
+
   size_t GetNumberOfBranches() const { return number_of_branches_; }
   HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
 
@@ -79,7 +83,7 @@
   HGraph* const graph_;
 
   const DexFile* const dex_file_;
-  const DexFile::CodeItem& code_item_;
+  const DexFile::CodeItem* const code_item_;  // null for intrinsic graph.
 
   ScopedArenaAllocator* const local_allocator_;
   ScopedArenaVector<HBasicBlock*> branch_targets_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 4ed1612..d73ef1f 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -24,6 +24,7 @@
 #include "data_type-inl.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_options.h"
+#include "driver/dex_compilation_unit.h"
 #include "instruction_builder.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
@@ -36,6 +37,7 @@
 namespace art {
 
 HGraphBuilder::HGraphBuilder(HGraph* graph,
+                             const DexFile::CodeItem* code_item,
                              const DexCompilationUnit* dex_compilation_unit,
                              const DexCompilationUnit* outer_compilation_unit,
                              CompilerDriver* driver,
@@ -45,7 +47,7 @@
                              VariableSizedHandleScope* handles)
     : graph_(graph),
       dex_file_(&graph->GetDexFile()),
-      code_item_(*dex_compilation_unit->GetCodeItem()),
+      code_item_(code_item),
       dex_compilation_unit_(dex_compilation_unit),
       outer_compilation_unit_(outer_compilation_unit),
       compiler_driver_(driver),
@@ -67,23 +69,21 @@
     return false;
   }
 
-  if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) {
+  if (compiler_options.IsHugeMethod(code_item_->insns_size_in_code_units_)) {
     VLOG(compiler) << "Skip compilation of huge method "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
-                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(compilation_stats_,
-                    MethodCompilationStat::kNotCompiledHugeMethod);
+                   << ": " << code_item_->insns_size_in_code_units_ << " code units";
+    MaybeRecordStat(compilation_stats_, MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
   // If it's large and contains no branches, it's likely to be machine generated initialization.
-  if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_)
+  if (compiler_options.IsLargeMethod(code_item_->insns_size_in_code_units_)
       && (number_of_branches == 0)) {
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
-                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
-    MaybeRecordStat(compilation_stats_,
-                    MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
+                   << ": " << code_item_->insns_size_in_code_units_ << " code units";
+    MaybeRecordStat(compilation_stats_, MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
 
@@ -91,12 +91,13 @@
 }
 
 GraphAnalysisResult HGraphBuilder::BuildGraph() {
+  DCHECK(code_item_ != nullptr);
   DCHECK(graph_->GetBlocks().empty());
 
-  graph_->SetNumberOfVRegs(code_item_.registers_size_);
-  graph_->SetNumberOfInVRegs(code_item_.ins_size_);
-  graph_->SetMaximumNumberOfOutVRegs(code_item_.outs_size_);
-  graph_->SetHasTryCatch(code_item_.tries_size_ != 0);
+  graph_->SetNumberOfVRegs(code_item_->registers_size_);
+  graph_->SetNumberOfInVRegs(code_item_->ins_size_);
+  graph_->SetMaximumNumberOfOutVRegs(code_item_->outs_size_);
+  graph_->SetHasTryCatch(code_item_->tries_size_ != 0);
 
   // Use ScopedArenaAllocator for all local allocations.
   ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
@@ -148,4 +149,61 @@
   return ssa_builder.BuildSsa();
 }
 
+void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) {
+  DCHECK(code_item_ == nullptr);
+  DCHECK(graph_->GetBlocks().empty());
+
+  // Determine the number of arguments and associated vregs.
+  uint32_t method_idx = dex_compilation_unit_->GetDexMethodIndex();
+  const char* shorty = dex_file_->GetMethodShorty(dex_file_->GetMethodId(method_idx));
+  size_t num_args = strlen(shorty + 1);
+  size_t num_wide_args = std::count(shorty + 1, shorty + 1 + num_args, 'J') +
+                         std::count(shorty + 1, shorty + 1 + num_args, 'D');
+  size_t num_arg_vregs = num_args + num_wide_args + (dex_compilation_unit_->IsStatic() ? 0u : 1u);
+
+  // For simplicity, reserve 2 vregs (the maximum) for return value regardless of the return type.
+  size_t return_vregs = 2u;
+  graph_->SetNumberOfVRegs(return_vregs + num_arg_vregs);
+  graph_->SetNumberOfInVRegs(num_arg_vregs);
+  graph_->SetMaximumNumberOfOutVRegs(num_arg_vregs);
+  graph_->SetHasTryCatch(false);
+
+  // Use ScopedArenaAllocator for all local allocations.
+  ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
+  HBasicBlockBuilder block_builder(graph_, dex_file_, /* code_item */ nullptr, &local_allocator);
+  SsaBuilder ssa_builder(graph_,
+                         dex_compilation_unit_->GetClassLoader(),
+                         dex_compilation_unit_->GetDexCache(),
+                         handles_,
+                         &local_allocator);
+  HInstructionBuilder instruction_builder(graph_,
+                                          &block_builder,
+                                          &ssa_builder,
+                                          dex_file_,
+                                          /* code_item */ nullptr,
+                                          return_type_,
+                                          dex_compilation_unit_,
+                                          outer_compilation_unit_,
+                                          compiler_driver_,
+                                          code_generator_,
+                                          interpreter_metadata_,
+                                          compilation_stats_,
+                                          handles_,
+                                          &local_allocator);
+
+  // 1) Create basic blocks for the intrinsic and link them together.
+  block_builder.BuildIntrinsic();
+
+  // 2) Build the trivial dominator tree.
+  GraphAnalysisResult bdt_result = graph_->BuildDominatorTree();
+  DCHECK_EQ(bdt_result, kAnalysisSuccess);
+
+  // 3) Populate basic blocks with instructions for the intrinsic.
+  instruction_builder.BuildIntrinsic(method);
+
+  // 4) Type the graph (no dead/redundant phis to eliminate).
+  GraphAnalysisResult build_ssa_result = ssa_builder.BuildSsa();
+  DCHECK_EQ(build_ssa_result, kAnalysisSuccess);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 5a860f1..0bb3a05 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -21,17 +21,19 @@
 #include "dex_file-inl.h"
 #include "dex_file.h"
 #include "driver/compiler_driver.h"
-#include "driver/dex_compilation_unit.h"
 #include "nodes.h"
 
 namespace art {
 
+class ArtMethod;
 class CodeGenerator;
+class DexCompilationUnit;
 class OptimizingCompilerStats;
 
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(HGraph* graph,
+                const DexFile::CodeItem* code_item,
                 const DexCompilationUnit* dex_compilation_unit,
                 const DexCompilationUnit* outer_compilation_unit,
                 CompilerDriver* driver,
@@ -47,8 +49,8 @@
                 VariableSizedHandleScope* handles,
                 DataType::Type return_type = DataType::Type::kInt32)
       : graph_(graph),
-        dex_file_(dex_compilation_unit->GetDexFile()),
-        code_item_(code_item),
+        dex_file_(&graph->GetDexFile()),
+        code_item_(&code_item),
         dex_compilation_unit_(dex_compilation_unit),
         outer_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
@@ -59,6 +61,7 @@
         return_type_(return_type) {}
 
   GraphAnalysisResult BuildGraph();
+  void BuildIntrinsicGraph(ArtMethod* method);
 
   static constexpr const char* kBuilderPassName = "builder";
 
@@ -67,7 +70,7 @@
 
   HGraph* const graph_;
   const DexFile* const dex_file_;
-  const DexFile::CodeItem& code_item_;
+  const DexFile::CodeItem* const code_item_;  // null for intrinsic graph.
 
   // The compilation unit of the current method being compiled. Note that
   // it can be an inlined method.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 015a6a0..0bd3ce9 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -946,12 +946,12 @@
 
 void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region,
                                    MemoryRegion method_info_region,
-                                   const DexFile::CodeItem& code_item) {
+                                   const DexFile::CodeItem* code_item_for_osr_check) {
   StackMapStream* stack_map_stream = GetStackMapStream();
   stack_map_stream->FillInCodeInfo(stack_map_region);
   stack_map_stream->FillInMethodInfo(method_info_region);
-  if (kIsDebugBuild) {
-    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), code_item);
+  if (kIsDebugBuild && code_item_for_osr_check != nullptr) {
+    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), *code_item_for_osr_check);
   }
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 18ad60d..08e4462 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -346,7 +346,7 @@
 
   void BuildStackMaps(MemoryRegion stack_map_region,
                       MemoryRegion method_info_region,
-                      const DexFile::CodeItem& code_item);
+                      const DexFile::CodeItem* code_item_for_osr_check);
   void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
   size_t GetNumberOfJitRoots() const;
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 7adb196..3f4a3d8 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1667,6 +1667,7 @@
     }
   }
   HGraphBuilder builder(callee_graph,
+                        code_item,
                         &dex_compilation_unit,
                         &outer_compilation_unit_,
                         compiler_driver_,
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 8e9b818..61840cc 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -272,6 +272,7 @@
 }
 
 bool HInstructionBuilder::Build() {
+  DCHECK(code_item_ != nullptr);
   locals_for_.resize(
       graph_->GetBlocks().size(),
       ScopedArenaVector<HInstruction*>(local_allocator_->Adapter(kArenaAllocGraphBuilder)));
@@ -321,7 +322,7 @@
       quicken_index = block_builder_->GetQuickenIndex(block_dex_pc);
     }
 
-    for (const DexInstructionPcPair& pair : code_item_.Instructions(block_dex_pc)) {
+    for (const DexInstructionPcPair& pair : code_item_->Instructions(block_dex_pc)) {
       if (current_block_ == nullptr) {
         // The previous instruction ended this block.
         break;
@@ -364,6 +365,73 @@
   return true;
 }
 
+void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
+  DCHECK(code_item_ == nullptr);
+  DCHECK(method->IsIntrinsic());
+
+  locals_for_.resize(
+      graph_->GetBlocks().size(),
+      ScopedArenaVector<HInstruction*>(local_allocator_->Adapter(kArenaAllocGraphBuilder)));
+
+  // Fill the entry block. Do not add suspend check, we do not want a suspend
+  // check in intrinsics; intrinsic methods are supposed to be fast.
+  current_block_ = graph_->GetEntryBlock();
+  InitializeBlockLocals();
+  InitializeParameters();
+  AppendInstruction(new (allocator_) HGoto(0u));
+
+  // Fill the body.
+  current_block_ = current_block_->GetSingleSuccessor();
+  InitializeBlockLocals();
+  DCHECK(!IsBlockPopulated(current_block_));
+
+  // Add the invoke and return instruction. Use HInvokeStaticOrDirect even
+  // for methods that would normally use an HInvokeVirtual (sharpen the call).
+  size_t in_vregs = graph_->GetNumberOfInVRegs();
+  size_t number_of_arguments =
+      in_vregs - std::count(current_locals_->end() - in_vregs, current_locals_->end(), nullptr);
+  uint32_t method_idx = dex_compilation_unit_->GetDexMethodIndex();
+  MethodReference target_method(dex_file_, method_idx);
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+      HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall,
+      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+      /* method_load_data */ 0u
+  };
+  InvokeType invoke_type = dex_compilation_unit_->IsStatic() ? kStatic : kDirect;
+  HInvokeStaticOrDirect* invoke = new (allocator_) HInvokeStaticOrDirect(
+      allocator_,
+      number_of_arguments,
+      return_type_,
+      kNoDexPc,
+      method_idx,
+      method,
+      dispatch_info,
+      invoke_type,
+      target_method,
+      HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+  HandleInvoke(invoke,
+               in_vregs,
+               /* args */ nullptr,
+               graph_->GetNumberOfVRegs() - in_vregs,
+               /* is_range */ true,
+               dex_file_->GetMethodShorty(method_idx),
+               /* clinit_check */ nullptr,
+               /* is_unresolved */ false);
+
+  // Add the return instruction.
+  if (return_type_ == DataType::Type::kVoid) {
+    AppendInstruction(new (allocator_) HReturnVoid());
+  } else {
+    AppendInstruction(new (allocator_) HReturn(invoke));
+  }
+
+  // Fill the exit block.
+  DCHECK_EQ(current_block_->GetSingleSuccessor(), graph_->GetExitBlock());
+  current_block_ = graph_->GetExitBlock();
+  InitializeBlockLocals();
+  AppendInstruction(new (allocator_) HExit());
+}
+
 ArenaBitVector* HInstructionBuilder::FindNativeDebugInfoLocations() {
   // The callback gets called when the line number changes.
   // In other words, it marks the start of new java statement.
@@ -373,15 +441,15 @@
       return false;
     }
   };
-  const uint32_t num_instructions = code_item_.insns_size_in_code_units_;
+  const uint32_t num_instructions = code_item_->insns_size_in_code_units_;
   ArenaBitVector* locations = ArenaBitVector::Create(local_allocator_,
                                                      num_instructions,
                                                      /* expandable */ false,
                                                      kArenaAllocGraphBuilder);
   locations->ClearAllBits();
-  dex_file_->DecodeDebugPositionInfo(&code_item_, Callback::Position, locations);
+  dex_file_->DecodeDebugPositionInfo(code_item_, Callback::Position, locations);
   // Instruction-specific tweaks.
-  IterationRange<DexInstructionIterator> instructions = code_item_.Instructions();
+  IterationRange<DexInstructionIterator> instructions = code_item_->Instructions();
   for (const DexInstructionPcPair& inst : instructions) {
     switch (inst->Opcode()) {
       case Instruction::MOVE_EXCEPTION: {
@@ -1641,7 +1709,7 @@
 
   int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
   const Instruction::ArrayDataPayload* payload =
-      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item_.insns_ + payload_offset);
+      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item_->insns_ + payload_offset);
   const uint8_t* data = payload->data;
   uint32_t element_count = payload->element_count;
 
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 058b711..f551ac4 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -50,7 +50,7 @@
                       HBasicBlockBuilder* block_builder,
                       SsaBuilder* ssa_builder,
                       const DexFile* dex_file,
-                      const DexFile::CodeItem& code_item,
+                      const DexFile::CodeItem* code_item,
                       DataType::Type return_type,
                       const DexCompilationUnit* dex_compilation_unit,
                       const DexCompilationUnit* outer_compilation_unit,
@@ -85,6 +85,7 @@
   }
 
   bool Build();
+  void BuildIntrinsic(ArtMethod* method);
 
  private:
   void InitializeBlockLocals();
@@ -327,7 +328,7 @@
 
   // The dex file where the method being compiled is, and the bytecode data.
   const DexFile* const dex_file_;
-  const DexFile::CodeItem& code_item_;
+  const DexFile::CodeItem* const code_item_;  // null for intrinsic graph.
 
   // The return type of the method being compiled.
   const DataType::Type return_type_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 2bba985..4974ed0 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -311,12 +311,8 @@
 
   CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
-                             const DexFile& dex_file) const OVERRIDE {
-    return ArtQuickJniCompileMethod(GetCompilerDriver(),
-                                    access_flags,
-                                    method_idx,
-                                    dex_file);
-  }
+                             const DexFile& dex_file,
+                             Handle<mirror::DexCache> dex_cache) const OVERRIDE;
 
   uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -366,18 +362,18 @@
   CodeGenerator* TryCompile(ArenaAllocator* allocator,
                             ArenaStack* arena_stack,
                             CodeVectorAllocator* code_allocator,
-                            const DexFile::CodeItem* code_item,
-                            uint32_t access_flags,
-                            InvokeType invoke_type,
-                            uint16_t class_def_idx,
-                            uint32_t method_idx,
-                            Handle<mirror::ClassLoader> class_loader,
-                            const DexFile& dex_file,
-                            Handle<mirror::DexCache> dex_cache,
+                            const DexCompilationUnit& dex_compilation_unit,
                             ArtMethod* method,
                             bool osr,
                             VariableSizedHandleScope* handles) const;
 
+  CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator,
+                                     ArenaStack* arena_stack,
+                                     CodeVectorAllocator* code_allocator,
+                                     const DexCompilationUnit& dex_compilation_unit,
+                                     ArtMethod* method,
+                                     VariableSizedHandleScope* handles) const;
+
   void MaybeRunInliner(HGraph* graph,
                        CodeGenerator* codegen,
                        CompilerDriver* driver,
@@ -790,7 +786,7 @@
         driver,
         dex_compilation_unit,
         handles);
-    RunOptimizations(&optimizations[0], optimizations.size(), pass_observer);
+    RunOptimizations(optimizations.data(), optimizations.size(), pass_observer);
     return;
   }
 
@@ -895,7 +891,7 @@
                                          CodeVectorAllocator* code_allocator,
                                          CodeGenerator* codegen,
                                          CompilerDriver* compiler_driver,
-                                         const DexFile::CodeItem* code_item) const {
+                                         const DexFile::CodeItem* code_item_for_osr_check) const {
   ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(allocator->Adapter(kArenaAllocStackMaps));
   ArenaVector<uint8_t> method_info(allocator->Adapter(kArenaAllocStackMaps));
@@ -906,7 +902,7 @@
   method_info.resize(method_info_size);
   codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()),
                           MemoryRegion(method_info.data(), method_info.size()),
-                          *code_item);
+                          code_item_for_osr_check);
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
@@ -929,21 +925,16 @@
 CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
                                               ArenaStack* arena_stack,
                                               CodeVectorAllocator* code_allocator,
-                                              const DexFile::CodeItem* code_item,
-                                              uint32_t access_flags,
-                                              InvokeType invoke_type,
-                                              uint16_t class_def_idx,
-                                              uint32_t method_idx,
-                                              Handle<mirror::ClassLoader> class_loader,
-                                              const DexFile& dex_file,
-                                              Handle<mirror::DexCache> dex_cache,
+                                              const DexCompilationUnit& dex_compilation_unit,
                                               ArtMethod* method,
                                               bool osr,
                                               VariableSizedHandleScope* handles) const {
-  MaybeRecordStat(compilation_stats_.get(),
-                  MethodCompilationStat::kAttemptCompilation);
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
+  const DexFile& dex_file = *dex_compilation_unit.GetDexFile();
+  uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex();
+  const DexFile::CodeItem* code_item = dex_compilation_unit.GetCodeItem();
 
   // Always use the Thumb-2 assembler: some runtime functionality
   // (like implicit stack overflow checks) assume Thumb-2.
@@ -973,18 +964,6 @@
     return nullptr;
   }
 
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  DexCompilationUnit dex_compilation_unit(
-      class_loader,
-      class_linker,
-      dex_file,
-      code_item,
-      class_def_idx,
-      method_idx,
-      access_flags,
-      /* verified_method */ nullptr,
-      dex_cache);
-
   HGraph* graph = new (allocator) HGraph(
       allocator,
       arena_stack,
@@ -996,11 +975,6 @@
       osr);
 
   const uint8_t* interpreter_metadata = nullptr;
-  if (method == nullptr) {
-    ScopedObjectAccess soa(Thread::Current());
-    method = compiler_driver->ResolveMethod(
-        soa, dex_cache, class_loader, &dex_compilation_unit, method_idx, invoke_type);
-  }
   // For AOT compilation, we may not get a method, for example if its class is erroneous.
   // JIT should always have a method.
   DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr);
@@ -1034,6 +1008,7 @@
     VLOG(compiler) << "Building " << pass_observer.GetMethodName();
     PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
     HGraphBuilder builder(graph,
+                          code_item,
                           &dex_compilation_unit,
                           &dex_compilation_unit,
                           compiler_driver,
@@ -1093,6 +1068,112 @@
   return codegen.release();
 }
 
+CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
+    ArenaAllocator* allocator,
+    ArenaStack* arena_stack,
+    CodeVectorAllocator* code_allocator,
+    const DexCompilationUnit& dex_compilation_unit,
+    ArtMethod* method,
+    VariableSizedHandleScope* handles) const {
+  CompilerDriver* compiler_driver = GetCompilerDriver();
+  InstructionSet instruction_set = compiler_driver->GetInstructionSet();
+  const DexFile& dex_file = *dex_compilation_unit.GetDexFile();
+  uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex();
+
+  // Always use the Thumb-2 assembler: some runtime functionality
+  // (like implicit stack overflow checks) assume Thumb-2.
+  DCHECK_NE(instruction_set, InstructionSet::kArm);
+
+  // Do not attempt to compile on architectures we do not support.
+  if (!IsInstructionSetSupported(instruction_set)) {
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledUnsupportedIsa);
+    return nullptr;
+  }
+
+  HGraph* graph = new (allocator) HGraph(
+      allocator,
+      arena_stack,
+      dex_file,
+      method_idx,
+      compiler_driver->GetInstructionSet(),
+      kInvalidInvokeType,
+      compiler_driver->GetCompilerOptions().GetDebuggable(),
+      /* osr */ false);
+
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  DCHECK(method != nullptr);
+  graph->SetArtMethod(method);
+
+  std::unique_ptr<CodeGenerator> codegen(
+      CodeGenerator::Create(graph,
+                            instruction_set,
+                            *compiler_driver->GetInstructionSetFeatures(),
+                            compiler_driver->GetCompilerOptions(),
+                            compilation_stats_.get()));
+  if (codegen.get() == nullptr) {
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledNoCodegen);
+    return nullptr;
+  }
+  codegen->GetAssembler()->cfi().SetEnabled(
+      compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo());
+
+  PassObserver pass_observer(graph,
+                             codegen.get(),
+                             visualizer_output_.get(),
+                             compiler_driver,
+                             dump_mutex_);
+
+  {
+    VLOG(compiler) << "Building intrinsic graph " << pass_observer.GetMethodName();
+    PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
+    HGraphBuilder builder(graph,
+                          /* code_item */ nullptr,
+                          &dex_compilation_unit,
+                          &dex_compilation_unit,
+                          compiler_driver,
+                          codegen.get(),
+                          compilation_stats_.get(),
+                          /* interpreter_metadata */ nullptr,
+                          handles);
+    builder.BuildIntrinsicGraph(method);
+  }
+
+  OptimizingCompilerStats* stats = compilation_stats_.get();
+  InstructionSimplifier* simplify = new (allocator) InstructionSimplifier(
+      graph, codegen.get(), compiler_driver, stats);
+  IntrinsicsRecognizer* intrinsics = new (allocator) IntrinsicsRecognizer(graph, stats);
+
+  HOptimization* optimizations[] = {
+      intrinsics,
+      // Some intrinsics are converted to HIR by the simplifier and the codegen also
+      // has a few assumptions that only the instruction simplifier can satisfy.
+      simplify,
+  };
+  RunOptimizations(optimizations, arraysize(optimizations), &pass_observer);
+
+  RunArchOptimizations(compiler_driver->GetInstructionSet(), graph, codegen.get(), &pass_observer);
+
+  AllocateRegisters(graph,
+                    codegen.get(),
+                    &pass_observer,
+                    compiler_driver->GetCompilerOptions().GetRegisterAllocationStrategy(),
+                    compilation_stats_.get());
+  if (!codegen->IsLeafMethod()) {
+    VLOG(compiler) << "Intrinsic method is not leaf: " << method->GetIntrinsic()
+        << " " << graph->PrettyMethod();
+    return nullptr;
+  }
+
+  codegen->Compile(code_allocator);
+  pass_observer.DumpDisassembly();
+
+  VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic()
+      << " " << graph->PrettyMethod();
+  return codegen.release();
+}
+
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
@@ -1102,42 +1183,71 @@
                                             const DexFile& dex_file,
                                             Handle<mirror::DexCache> dex_cache) const {
   CompilerDriver* compiler_driver = GetCompilerDriver();
-  CompiledMethod* method = nullptr;
-  DCHECK(Runtime::Current()->IsAotCompiler());
+  CompiledMethod* compiled_method = nullptr;
+  Runtime* runtime = Runtime::Current();
+  DCHECK(runtime->IsAotCompiler());
   const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
   DCHECK(!verified_method->HasRuntimeThrow());
   if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) ||
       verifier::CanCompilerHandleVerificationFailure(
           verified_method->GetEncounteredVerificationFailures())) {
-    ArenaAllocator allocator(Runtime::Current()->GetArenaPool());
-    ArenaStack arena_stack(Runtime::Current()->GetArenaPool());
+    ArenaAllocator allocator(runtime->GetArenaPool());
+    ArenaStack arena_stack(runtime->GetArenaPool());
     CodeVectorAllocator code_allocator(&allocator);
     std::unique_ptr<CodeGenerator> codegen;
+    bool compiled_intrinsic = false;
     {
+      DexCompilationUnit dex_compilation_unit(
+          jclass_loader,
+          runtime->GetClassLinker(),
+          dex_file,
+          code_item,
+          class_def_idx,
+          method_idx,
+          access_flags,
+          /* verified_method */ nullptr,  // Not needed by the Optimizing compiler.
+          dex_cache);
       ScopedObjectAccess soa(Thread::Current());
+      ArtMethod* method = compiler_driver->ResolveMethod(
+            soa, dex_cache, jclass_loader, &dex_compilation_unit, method_idx, invoke_type);
       VariableSizedHandleScope handles(soa.Self());
       // Go to native so that we don't block GC during compilation.
       ScopedThreadSuspension sts(soa.Self(), kNative);
-      codegen.reset(
-          TryCompile(&allocator,
-                     &arena_stack,
-                     &code_allocator,
-                     code_item,
-                     access_flags,
-                     invoke_type,
-                     class_def_idx,
-                     method_idx,
-                     jclass_loader,
-                     dex_file,
-                     dex_cache,
-                     nullptr,
-                     /* osr */ false,
-                     &handles));
+      if (method != nullptr && UNLIKELY(method->IsIntrinsic())) {
+        DCHECK(compiler_driver->GetCompilerOptions().IsBootImage());
+        codegen.reset(
+            TryCompileIntrinsic(&allocator,
+                                &arena_stack,
+                                &code_allocator,
+                                dex_compilation_unit,
+                                method,
+                                &handles));
+        if (codegen != nullptr) {
+          compiled_intrinsic = true;
+        }
+      }
+      if (codegen == nullptr) {
+        codegen.reset(
+            TryCompile(&allocator,
+                       &arena_stack,
+                       &code_allocator,
+                       dex_compilation_unit,
+                       method,
+                       /* osr */ false,
+                       &handles));
+      }
     }
     if (codegen.get() != nullptr) {
       MaybeRecordStat(compilation_stats_.get(),
                       MethodCompilationStat::kCompiled);
-      method = Emit(&allocator, &code_allocator, codegen.get(), compiler_driver, code_item);
+      compiled_method = Emit(&allocator,
+                             &code_allocator,
+                             codegen.get(),
+                             compiler_driver,
+                             compiled_intrinsic ? nullptr : code_item);
+      if (compiled_intrinsic) {
+        compiled_method->MarkAsIntrinsic();
+      }
 
       if (kArenaAllocatorCountAllocations) {
         codegen.reset();  // Release codegen's ScopedArenaAllocator for memory accounting.
@@ -1171,10 +1281,62 @@
     // regressing.
     std::string method_name = dex_file.PrettyMethod(method_idx);
     bool shouldCompile = method_name.find("$opt$") != std::string::npos;
-    DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
+    DCHECK((compiled_method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
   }
 
-  return method;
+  return compiled_method;
+}
+
+CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
+                                               uint32_t method_idx,
+                                               const DexFile& dex_file,
+                                               Handle<mirror::DexCache> dex_cache) const {
+  if (GetCompilerDriver()->GetCompilerOptions().IsBootImage()) {
+    ScopedObjectAccess soa(Thread::Current());
+    Runtime* runtime = Runtime::Current();
+    ArtMethod* method = runtime->GetClassLinker()->LookupResolvedMethod(
+        method_idx, dex_cache.Get(), /* class_loader */ nullptr);
+    if (method != nullptr && UNLIKELY(method->IsIntrinsic())) {
+      ScopedNullHandle<mirror::ClassLoader> class_loader;  // null means boot class path loader.
+      DexCompilationUnit dex_compilation_unit(
+          class_loader,
+          runtime->GetClassLinker(),
+          dex_file,
+          /* code_item */ nullptr,
+          /* class_def_idx */ DexFile::kDexNoIndex16,
+          method_idx,
+          access_flags,
+          /* verified_method */ nullptr,
+          dex_cache);
+      ArenaAllocator allocator(runtime->GetArenaPool());
+      ArenaStack arena_stack(runtime->GetArenaPool());
+      CodeVectorAllocator code_allocator(&allocator);
+      VariableSizedHandleScope handles(soa.Self());
+      // Go to native so that we don't block GC during compilation.
+      ScopedThreadSuspension sts(soa.Self(), kNative);
+      std::unique_ptr<CodeGenerator> codegen(
+          TryCompileIntrinsic(&allocator,
+                              &arena_stack,
+                              &code_allocator,
+                              dex_compilation_unit,
+                              method,
+                              &handles));
+      if (codegen != nullptr) {
+        CompiledMethod* compiled_method = Emit(&allocator,
+                                               &code_allocator,
+                                               codegen.get(),
+                                               GetCompilerDriver(),
+                                               /* code_item_for_osr_check */ nullptr);
+        compiled_method->MarkAsIntrinsic();
+        return compiled_method;
+      }
+    }
+  }
+
+  return ArtQuickJniCompileMethod(GetCompilerDriver(),
+                                  access_flags,
+                                  method_idx,
+                                  dex_file);
 }
 
 Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
@@ -1221,29 +1383,33 @@
   const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
   const uint32_t method_idx = method->GetDexMethodIndex();
   const uint32_t access_flags = method->GetAccessFlags();
-  const InvokeType invoke_type = method->GetInvokeType();
 
-  ArenaAllocator allocator(Runtime::Current()->GetJitArenaPool());
+  Runtime* runtime = Runtime::Current();
+  ArenaAllocator allocator(runtime->GetJitArenaPool());
   ArenaStack arena_stack(Runtime::Current()->GetJitArenaPool());
   CodeVectorAllocator code_allocator(&allocator);
   VariableSizedHandleScope handles(self);
 
   std::unique_ptr<CodeGenerator> codegen;
   {
+    DexCompilationUnit dex_compilation_unit(
+        class_loader,
+        runtime->GetClassLinker(),
+        *dex_file,
+        code_item,
+        class_def_idx,
+        method_idx,
+        access_flags,
+        /* verified_method */ nullptr,
+        dex_cache);
+
     // Go to native so that we don't block GC during compilation.
     ScopedThreadSuspension sts(self, kNative);
     codegen.reset(
         TryCompile(&allocator,
                    &arena_stack,
                    &code_allocator,
-                   code_item,
-                   access_flags,
-                   invoke_type,
-                   class_def_idx,
-                   method_idx,
-                   class_loader,
-                   *dex_file,
-                   dex_cache,
+                   dex_compilation_unit,
                    method,
                    osr,
                    &handles));
@@ -1286,7 +1452,7 @@
   MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
                           MemoryRegion(method_info_data, method_info_size),
-                          *code_item);
+                          code_item);
   codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data);
 
   const void* code = code_cache->CommitCode(
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index e90c30d..158c252 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -22,6 +22,7 @@
 #include "common_compiler_test.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
+#include "driver/dex_compilation_unit.h"
 #include "handle_scope-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
@@ -133,12 +134,11 @@
       if (handles_ == nullptr) {
         handles_.reset(new VariableSizedHandleScope(soa.Self()));
       }
-      const DexFile* dex_file = graph->GetAllocator()->Alloc<DexFile>();
       const DexCompilationUnit* dex_compilation_unit =
           new (graph->GetAllocator()) DexCompilationUnit(
               handles_->NewHandle<mirror::ClassLoader>(nullptr),
               /* class_linker */ nullptr,
-              *dex_file,
+              graph->GetDexFile(),
               code_item,
               /* class_def_index */ DexFile::kDexNoIndex16,
               /* method_idx */ dex::kDexNoIndex,
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index fe98aa9..1ed190d 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -53,16 +53,18 @@
 void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
   if (check->IsStringCharAt()) {
-    // Add a fake environment for String.charAt() inline info as we want
-    // the exception to appear as being thrown from there.
+    // Add a fake environment for String.charAt() inline info as we want the exception
+    // to appear as being thrown from there. Skip if we're compiling String.charAt() itself.
     ArtMethod* char_at_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt);
-    ArenaAllocator* allocator = GetGraph()->GetAllocator();
-    HEnvironment* environment = new (allocator) HEnvironment(allocator,
-                                                             /* number_of_vregs */ 0u,
-                                                             char_at_method,
-                                                             /* dex_pc */ dex::kDexNoIndex,
-                                                             check);
-    check->InsertRawEnvironment(environment);
+    if (GetGraph()->GetArtMethod() != char_at_method) {
+      ArenaAllocator* allocator = GetGraph()->GetAllocator();
+      HEnvironment* environment = new (allocator) HEnvironment(allocator,
+                                                               /* number_of_vregs */ 0u,
+                                                               char_at_method,
+                                                               /* dex_pc */ dex::kDexNoIndex,
+                                                               check);
+      check->InsertRawEnvironment(environment);
+    }
   }
 }
 
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 9bc8045..4f43eb3 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -32,7 +32,6 @@
                                         uint32_t num_dex_registers,
                                         uint8_t inlining_depth) {
   DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
-  DCHECK_NE(dex_pc, static_cast<uint32_t>(-1)) << "invalid dex_pc";
   current_entry_.dex_pc = dex_pc;
   current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_);
   current_entry_.register_mask = register_mask;
@@ -56,7 +55,10 @@
     number_of_stack_maps_with_inline_info_++;
   }
 
-  dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
+  // Note: dex_pc can be kNoDexPc for native method intrinsics.
+  if (dex_pc != dex::kDexNoIndex && (dex_pc_max_ == dex::kDexNoIndex || dex_pc_max_ < dex_pc)) {
+    dex_pc_max_ = dex_pc;
+  }
   register_mask_max_ = std::max(register_mask_max_, register_mask);
   current_dex_register_ = 0;
 }
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index e126609..579aabd 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -73,7 +73,7 @@
         method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
-        dex_pc_max_(0),
+        dex_pc_max_(kNoDexPc),
         register_mask_max_(0),
         number_of_stack_maps_with_inline_info_(0),
         dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(),