Use ScopedArenaAllocator for code generation.

Reuse the memory previously allocated on the ArenaStack by
optimization passes.

This CL handles only the architecture-independent codegen
and slow paths, architecture-dependent codegen allocations
shall be moved to the ScopedArenaAllocator in a follow-up.

Memory needed to compile the two most expensive methods for
aosp_angler-userdebug boot image:
  BatteryStats.dumpCheckinLocked() : 19.6MiB -> 18.5MiB (-1189KiB)
  BatteryStats.dumpLocked(): 39.3MiB -> 37.0MiB (-2379KiB)

Also move definitions of functions that use bit_vector-inl.h
from bit_vector.h also to bit_vector-inl.h .

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 64312607
Change-Id: I84688c3a5a95bf90f56bd3a150bc31fedc95f29c
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 2904b71..64c88eb 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -32,7 +32,7 @@
 #include "optimizing_compiler_stats.h"
 #include "read_barrier_option.h"
 #include "stack.h"
-#include "stack_map_stream.h"
+#include "stack_map.h"
 #include "string_reference.h"
 #include "type_reference.h"
 #include "utils/label.h"
@@ -61,6 +61,7 @@
 class CodeGenerator;
 class CompilerDriver;
 class CompilerOptions;
+class StackMapStream;
 class ParallelMoveResolver;
 
 namespace linker {
@@ -190,7 +191,7 @@
                                                const InstructionSetFeatures& isa_features,
                                                const CompilerOptions& compiler_options,
                                                OptimizingCompilerStats* stats = nullptr);
-  virtual ~CodeGenerator() {}
+  virtual ~CodeGenerator();
 
   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
   HGraph* GetGraph() const { return graph_; }
@@ -338,18 +339,16 @@
   // TODO: Replace with a catch-entering instruction that records the environment.
   void RecordCatchBlockInfo();
 
-  // TODO: Avoid creating the `std::unique_ptr` here.
-  void AddSlowPath(SlowPathCode* slow_path) {
-    slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
-  }
+  // Get the ScopedArenaAllocator used for codegen memory allocation.
+  ScopedArenaAllocator* GetScopedAllocator();
+
+  void AddSlowPath(SlowPathCode* slow_path);
 
   void BuildStackMaps(MemoryRegion stack_map_region,
                       MemoryRegion method_info_region,
                       const DexFile::CodeItem& code_item);
   void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
-  size_t GetNumberOfJitRoots() const {
-    return jit_string_roots_.size() + jit_class_roots_.size();
-  }
+  size_t GetNumberOfJitRoots() const;
 
   // Fills the `literals` array with literals collected during code generation.
   // Also emits literal patches.
@@ -600,38 +599,7 @@
                 uint32_t core_callee_save_mask,
                 uint32_t fpu_callee_save_mask,
                 const CompilerOptions& compiler_options,
-                OptimizingCompilerStats* stats)
-      : frame_size_(0),
-        core_spill_mask_(0),
-        fpu_spill_mask_(0),
-        first_register_slot_in_slow_path_(0),
-        allocated_registers_(RegisterSet::Empty()),
-        blocked_core_registers_(graph->GetAllocator()->AllocArray<bool>(number_of_core_registers,
-                                                                        kArenaAllocCodeGenerator)),
-        blocked_fpu_registers_(graph->GetAllocator()->AllocArray<bool>(number_of_fpu_registers,
-                                                                       kArenaAllocCodeGenerator)),
-        number_of_core_registers_(number_of_core_registers),
-        number_of_fpu_registers_(number_of_fpu_registers),
-        number_of_register_pairs_(number_of_register_pairs),
-        core_callee_save_mask_(core_callee_save_mask),
-        fpu_callee_save_mask_(fpu_callee_save_mask),
-        stack_map_stream_(graph->GetAllocator(), graph->GetInstructionSet()),
-        block_order_(nullptr),
-        jit_string_roots_(StringReferenceValueComparator(),
-                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        jit_class_roots_(TypeReferenceValueComparator(),
-                         graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        disasm_info_(nullptr),
-        stats_(stats),
-        graph_(graph),
-        compiler_options_(compiler_options),
-        slow_paths_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        current_slow_path_(nullptr),
-        current_block_index_(0),
-        is_leaf_(true),
-        requires_current_method_(false) {
-    slow_paths_.reserve(8);
-  }
+                OptimizingCompilerStats* stats);
 
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
@@ -687,12 +655,15 @@
     return current_slow_path_;
   }
 
+  StackMapStream* GetStackMapStream();
+
+  void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
+  uint64_t GetJitStringRootIndex(StringReference string_reference);
+  void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
+  uint64_t GetJitClassRootIndex(TypeReference type_reference);
+
   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
-  virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
-                                  const uint8_t* roots_data ATTRIBUTE_UNUSED) {
-    DCHECK_EQ(jit_string_roots_.size(), 0u);
-    DCHECK_EQ(jit_class_roots_.size(), 0u);
-  }
+  virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data);
 
   // Frame size required for this method.
   uint32_t frame_size_;
@@ -714,24 +685,15 @@
   const uint32_t core_callee_save_mask_;
   const uint32_t fpu_callee_save_mask_;
 
-  StackMapStream stack_map_stream_;
-
   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;
 
-  // Maps a StringReference (dex_file, string_index) to the index in the literal table.
-  // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
-  // will compute all the indices.
-  ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
-
-  // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
-  // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
-  // will compute all the indices.
-  ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
-
   DisassemblyInformation* disasm_info_;
 
  private:
+  class CodeGenerationData;
+
+  void InitializeCodeGenerationData();
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
   void BlockIfInRegister(Location location, bool is_out = false) const;
@@ -742,8 +704,6 @@
   HGraph* const graph_;
   const CompilerOptions& compiler_options_;
 
-  ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
-
   // The current slow-path that we're generating code for.
   SlowPathCode* current_slow_path_;
 
@@ -759,6 +719,12 @@
   // needs the environment including a valid stack frame.
   bool requires_current_method_;
 
+  // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the
+  // ArenaStack memory allocated in previous passes instead of adding to the memory
+  // held by the ArenaAllocator. This ScopedArenaAllocator is created in
+  // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
+  std::unique_ptr<CodeGenerationData> code_generation_data_;
+
   friend class OptimizingCFITest;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
@@ -863,7 +829,8 @@
                                 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}});
     }
     // Cannot share: create and add new slow-path for this particular dex-pc.
-    SlowPathCodeType* slow_path = new (graph_->GetAllocator()) SlowPathCodeType(instruction);
+    SlowPathCodeType* slow_path =
+        new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction);
     iter->second.emplace_back(std::make_pair(instruction, slow_path));
     codegen_->AddSlowPath(slow_path);
     return slow_path;