Use iterators "before" the use node in HUserRecord<>.

Create a new template class IntrusiveForwardList<> that
mimicks std::forward_list<> except that all allocations
are handled externally. This is essentially the same as
boost::intrusive::slist<> but since we're not using Boost
we have to reinvent the wheel.

Use the new container to replace the HUseList and use the
iterators to "before" use nodes in HUserRecord<> to avoid
the extra pointer to the previous node which was used
exclusively for removing nodes from the list. This reduces
the size of the HUseListNode by 25%, 32B to 24B in 64-bit
compiler, 16B to 12B in 32-bit compiler. This translates
directly to overall memory savings for the 64-bit compiler
but due to rounding up of the arena allocations to 8B, we
do not get any improvement in the 32-bit compiler.

Compiling the Nexus 5 boot image with the 64-bit dex2oat
on host this CL reduces the memory used for compiling the
most hungry method, BatteryStats.dumpLocked(), by ~3.3MiB:

Before:
  MEM: used: 47829200, allocated: 48769120, lost: 939920
  Number of arenas allocated: 345,
  Number of allocations: 815492, avg size: 58
  ...
  UseListNode    13744640
  ...
After:
  MEM: used: 44393040, allocated: 45361248, lost: 968208
  Number of arenas allocated: 319,
  Number of allocations: 815492, avg size: 54
  ...
  UseListNode    10308480
  ...

Note that while we do not ship the 64-bit dex2oat to the
device, the JIT compilation for 64-bit processes is using
the 64-bit libart-compiler.

Bug: 28173563
Change-Id: I985eabd4816f845372d8aaa825a1489cf9569208
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index ac7ed86..2de4158 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -43,31 +43,29 @@
 
     // Visit all uses to determine if this reference can spread into the heap,
     // a method call, etc.
-    for (HUseIterator<HInstruction*> use_it(reference_->GetUses());
-         !use_it.Done();
-         use_it.Advance()) {
-      HInstruction* use = use_it.Current()->GetUser();
-      DCHECK(!use->IsNullCheck()) << "NullCheck should have been eliminated";
-      if (use->IsBoundType()) {
+    for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) {
+      HInstruction* user = use.GetUser();
+      DCHECK(!user->IsNullCheck()) << "NullCheck should have been eliminated";
+      if (user->IsBoundType()) {
         // BoundType shouldn't normally be necessary for a NewInstance.
         // Just be conservative for the uncommon cases.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsPhi() || use->IsSelect() || use->IsInvoke() ||
-          (use->IsInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsUnresolvedInstanceFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) ||
-          (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(0))) ||
-          (use->IsArraySet() && (reference_ == use->InputAt(2)))) {
+      if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
+          (user->IsInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsStaticFieldSet() && (reference_ == user->InputAt(1))) ||
+          (user->IsUnresolvedStaticFieldSet() && (reference_ == user->InputAt(0))) ||
+          (user->IsArraySet() && (reference_ == user->InputAt(2)))) {
         // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
         return;
       }
-      if (use->IsReturn()) {
+      if (user->IsReturn()) {
         is_singleton_and_not_returned_ = false;
       }
     }