Use ScopedArenaAllocator for code generation.

Reuse the memory previously allocated on the ArenaStack by
optimization passes.

This CL handles only the architecture-independent codegen
and slow paths, architecture-dependent codegen allocations
shall be moved to the ScopedArenaAllocator in a follow-up.

Memory needed to compile the two most expensive methods for
aosp_angler-userdebug boot image:
  BatteryStats.dumpCheckinLocked() : 19.6MiB -> 18.5MiB (-1189KiB)
  BatteryStats.dumpLocked(): 39.3MiB -> 37.0MiB (-2379KiB)

Also move definitions of functions that use bit_vector-inl.h
from bit_vector.h also to bit_vector-inl.h .

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 64312607
Change-Id: I84688c3a5a95bf90f56bd3a150bc31fedc95f29c
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 8b389ba..8a0b6ae 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1253,7 +1253,7 @@
   Register count = locations->GetTemp(2).AsRegister<Register>();
   DCHECK_EQ(count, ECX);
 
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
 
   // Bail out if the source and destination are the same (to handle overlap).
@@ -1336,7 +1336,7 @@
 
   Register argument = locations->InAt(1).AsRegister<Register>();
   __ testl(argument, argument);
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1485,7 +1485,6 @@
 static void GenerateStringIndexOf(HInvoke* invoke,
                                   X86Assembler* assembler,
                                   CodeGeneratorX86* codegen,
-                                  ArenaAllocator* allocator,
                                   bool start_at_zero) {
   LocationSummary* locations = invoke->GetLocations();
 
@@ -1515,7 +1514,7 @@
     std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
-      slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+      slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
       codegen->AddSlowPath(slow_path);
       __ jmp(slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
@@ -1523,7 +1522,7 @@
     }
   } else if (code_point->GetType() != DataType::Type::kUint16) {
     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
-    slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+    slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
     codegen->AddSlowPath(slow_path);
     __ j(kAbove, slow_path->GetEntryLabel());
   }
@@ -1640,7 +1639,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1648,8 +1647,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(
-      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1669,7 +1667,7 @@
 
   Register byte_array = locations->InAt(0).AsRegister<Register>();
   __ testl(byte_array, byte_array);
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1713,7 +1711,7 @@
 
   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
   __ testl(string_to_copy, string_to_copy);
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -2901,7 +2899,8 @@
   Location temp2_loc = locations->GetTemp(1);
   Register temp2 = temp2_loc.AsRegister<Register>();
 
-  SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  SlowPathCode* intrinsic_slow_path =
+      new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(intrinsic_slow_path);
 
   NearLabel conditions_on_positions_validated;
@@ -3215,7 +3214,7 @@
 
     // Slow path used to copy array when `src` is gray.
     SlowPathCode* read_barrier_slow_path =
-        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+        new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
     codegen_->AddSlowPath(read_barrier_slow_path);
 
     // We have done the "if" of the gray bit check above, now branch based on the flags.