Rewrite BitVector index iterator.

The BitVector::Iterator was not iterating over the bits but
rather over indexes of the set bits. Therefore, we rename it
to IndexIterator and provide a BitVector::Indexes() to get
a container-style interface with begin() and end() for range
based for loops.

Also, simplify InsertPhiNodes where the tmp_blocks isn't
needed since the phi_nodes and input_blocks cannot lose any
blocks in subsequent iterations, so we can do the Union()
directly in those bit vectors and we need to repeat the loop
only if we have new input_blocks, rather than on phi_nodes
change. And move the temporary bit vectors to scoped arena.

Change-Id: I6cb87a2f60724eeef67c6aaa34b36ed5acde6d43
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 274678a..021392c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -61,7 +61,6 @@
 	dex/mir_optimization.cc \
 	dex/bb_optimizations.cc \
 	dex/pass_driver_me.cc \
-	dex/bit_vector_block_iterator.cc \
 	dex/frontend.cc \
 	dex/mir_graph.cc \
 	dex/mir_analysis.cc \
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index 1852f80..8b5eba0 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -38,6 +38,13 @@
 /*
  * SSATransformation pass implementation start.
  */
+void SSATransformation::Start(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
+  cUnit->mir_graph->SSATransformationStart();
+}
+
 bool SSATransformation::Worker(const PassDataHolder* data) const {
   DCHECK(data != nullptr);
   const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
@@ -54,10 +61,7 @@
   DCHECK(data != nullptr);
   CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
   DCHECK(cUnit != nullptr);
-  // Verify the dataflow information after the pass.
-  if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) {
-    cUnit->mir_graph->VerifyDataflow();
-  }
+  cUnit->mir_graph->SSATransformationEnd();
 }
 
 /*
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 43dcdf4..3a529f2 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -143,12 +143,7 @@
 
   bool Worker(const PassDataHolder* data) const;
 
-  void Start(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(cUnit != nullptr);
-    cUnit->mir_graph->InitializeSSATransformation();
-  }
+  void Start(const PassDataHolder* data) const;
 
   void End(const PassDataHolder* data) const;
 };
diff --git a/compiler/dex/bit_vector_block_iterator.cc b/compiler/dex/bit_vector_block_iterator.cc
deleted file mode 100644
index 32d7d71..0000000
--- a/compiler/dex/bit_vector_block_iterator.cc
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bit_vector_block_iterator.h"
-#include "mir_graph.h"
-
-namespace art {
-
-BasicBlock* BitVectorBlockIterator::Next() {
-  int idx = internal_iterator_.Next();
-
-  if (idx == -1) {
-    return nullptr;
-  }
-
-  return mir_graph_->GetBasicBlock(idx);
-}
-
-}  // namespace art
diff --git a/compiler/dex/bit_vector_block_iterator.h b/compiler/dex/bit_vector_block_iterator.h
deleted file mode 100644
index 0f1c2b6..0000000
--- a/compiler/dex/bit_vector_block_iterator.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_BIT_VECTOR_BLOCK_ITERATOR_H_
-#define ART_COMPILER_DEX_BIT_VECTOR_BLOCK_ITERATOR_H_
-
-#include "base/bit_vector.h"
-#include "compiler_enums.h"
-#include "utils/arena_bit_vector.h"
-#include "utils/arena_allocator.h"
-#include "compiler_ir.h"
-
-namespace art {
-
-class MIRGraph;
-
-/**
- * @class BasicBlockIterator
- * @brief Helper class to get the BasicBlocks when iterating through the ArenaBitVector.
- */
-class BitVectorBlockIterator {
-  public:
-    explicit BitVectorBlockIterator(BitVector* bv, MIRGraph* mir_graph)
-      : mir_graph_(mir_graph),
-        internal_iterator_(bv) {}
-
-    explicit BitVectorBlockIterator(BitVector* bv, CompilationUnit* c_unit)
-      : mir_graph_(c_unit->mir_graph.get()),
-        internal_iterator_(bv) {}
-
-    BasicBlock* Next();
-
-    void* operator new(size_t size, ArenaAllocator* arena) {
-      return arena->Alloc(size, kArenaAllocGrowableArray);
-    };
-    void operator delete(void* p) {}  // Nop.
-
-  private:
-    MIRGraph* const mir_graph_;
-    BitVector::Iterator internal_iterator_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_BIT_VECTOR_BLOCK_ITERATOR_H_
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index c34a9f5..0ef66d2 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -80,7 +80,6 @@
       topological_order_(nullptr),
       i_dom_list_(NULL),
       def_block_matrix_(NULL),
-      temp_dalvik_register_v_(NULL),
       temp_scoped_alloc_(),
       temp_insn_data_(nullptr),
       temp_bit_vector_size_(0u),
@@ -1318,7 +1317,13 @@
   }
 }
 
-void MIRGraph::InitializeSSATransformation() {
+void MIRGraph::SSATransformationStart() {
+  DCHECK(temp_scoped_alloc_.get() == nullptr);
+  temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
+  temp_bit_vector_size_ = cu_->num_dalvik_registers;
+  temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapRegisterV);
+
   /* Compute the DFS order */
   ComputeDFSOrders();
 
@@ -1339,6 +1344,18 @@
   DoDFSPreOrderSSARename(GetEntryBlock());
 }
 
+void MIRGraph::SSATransformationEnd() {
+  // Verify the dataflow information after the pass.
+  if (cu_->enable_debug & (1 << kDebugVerifyDataflow)) {
+    VerifyDataflow();
+  }
+
+  temp_bit_vector_size_ = 0u;
+  temp_bit_vector_ = nullptr;
+  DCHECK(temp_scoped_alloc_.get() != nullptr);
+  temp_scoped_alloc_.reset();
+}
+
 void MIRGraph::ComputeTopologicalSortOrder() {
   std::queue<BasicBlock *> q;
   std::map<int, int> visited_cnt_values;
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3a00a43..c97dcd6 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -890,7 +890,7 @@
   /**
    * @brief Perform the initial preparation for the SSA Transformation.
    */
-  void InitializeSSATransformation();
+  void SSATransformationStart();
 
   /**
    * @brief Insert a the operands for the Phi nodes.
@@ -900,6 +900,11 @@
   bool InsertPhiNodeOperands(BasicBlock* bb);
 
   /**
+   * @brief Perform the cleanup after the SSA Transformation.
+   */
+  void SSATransformationEnd();
+
+  /**
    * @brief Perform constant propagation on a BasicBlock.
    * @param bb the considered BasicBlock.
    */
@@ -1012,7 +1017,6 @@
   GrowableArray<BasicBlockId>* topological_order_;
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // num_dalvik_register x num_blocks.
-  ArenaBitVector* temp_dalvik_register_v_;
   std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_;
   uint16_t* temp_insn_data_;
   uint32_t temp_bit_vector_size_;
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 865311b..6f47b8f 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "bit_vector_block_iterator.h"
 #include "compiler_internals.h"
 #include "dataflow_iterator-inl.h"
 
@@ -127,12 +126,7 @@
     return false;
   }
 
-  ArenaBitVector::Iterator iterator(bb->data_flow_info->def_v);
-  while (true) {
-    int idx = iterator.Next();
-    if (idx == -1) {
-      break;
-    }
+  for (uint32_t idx : bb->data_flow_info->def_v->Indexes()) {
     /* Block bb defines register idx */
     def_block_matrix_[idx]->SetBit(bb->id);
   }
@@ -182,22 +176,22 @@
     dom_post_order_traversal_->Reset();
   }
   ClearAllVisitedFlags();
-  std::vector<std::pair<BasicBlock*, ArenaBitVector::Iterator*>> work_stack;
+  std::vector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack;
   bb->visited = true;
-  work_stack.push_back(std::make_pair(bb, bb->i_dominated->GetIterator()));
+  work_stack.push_back(std::make_pair(bb, bb->i_dominated->Indexes().begin()));
   while (!work_stack.empty()) {
-    const std::pair<BasicBlock*, ArenaBitVector::Iterator*>& curr = work_stack.back();
-    BasicBlock* curr_bb = curr.first;
-    ArenaBitVector::Iterator* curr_idom_iter = curr.second;
-    int bb_idx = curr_idom_iter->Next();
-    while ((bb_idx != -1) && (NeedsVisit(GetBasicBlock(bb_idx)) == NULL)) {
-      bb_idx = curr_idom_iter->Next();
+    std::pair<BasicBlock*, ArenaBitVector::IndexIterator>* curr = &work_stack.back();
+    BasicBlock* curr_bb = curr->first;
+    ArenaBitVector::IndexIterator* curr_idom_iter = &curr->second;
+    while (!curr_idom_iter->Done() && (NeedsVisit(GetBasicBlock(**curr_idom_iter)) == nullptr)) {
+      ++*curr_idom_iter;
     }
-    if (bb_idx != -1) {
-      BasicBlock* new_bb = GetBasicBlock(bb_idx);
+    // NOTE: work_stack.push_back()/pop_back() invalidate curr and curr_idom_iter.
+    if (!curr_idom_iter->Done()) {
+      BasicBlock* new_bb = GetBasicBlock(**curr_idom_iter);
+      ++*curr_idom_iter;
       new_bb->visited = true;
-      work_stack.push_back(
-          std::make_pair(new_bb, new_bb->i_dominated->GetIterator()));
+      work_stack.push_back(std::make_pair(new_bb, new_bb->i_dominated->Indexes().begin()));
     } else {
       // no successor/next
       if (curr_bb->id != NullBasicBlockId) {
@@ -249,11 +243,10 @@
   }
 
   /* Calculate DF_up */
-  BitVectorBlockIterator it(bb->i_dominated, cu_);
-  for (BasicBlock *dominated_bb = it.Next(); dominated_bb != nullptr; dominated_bb = it.Next()) {
-    BitVectorBlockIterator inner_it(dominated_bb->dom_frontier, cu_);
-    for (BasicBlock *df_up_block = inner_it.Next(); df_up_block != nullptr;
-         df_up_block = inner_it.Next()) {
+  for (uint32_t dominated_idx : bb->i_dominated->Indexes()) {
+    BasicBlock *dominated_bb = GetBasicBlock(dominated_idx);
+    for (uint32_t df_up_block_idx : dominated_bb->dom_frontier->Indexes()) {
+      BasicBlock *df_up_block = GetBasicBlock(df_up_block_idx);
       CheckForDominanceFrontier(bb, df_up_block);
     }
   }
@@ -449,7 +442,8 @@
  * insert a phi node if the variable is live-in to the block.
  */
 bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) {
-  ArenaBitVector* temp_dalvik_register_v = temp_dalvik_register_v_;
+  DCHECK_EQ(temp_bit_vector_size_, cu_->num_dalvik_registers);
+  ArenaBitVector* temp_dalvik_register_v = temp_bit_vector_;
 
   if (bb->data_flow_info == NULL) {
     return false;
@@ -487,15 +481,10 @@
 /* Insert phi nodes to for each variable to the dominance frontiers */
 void MIRGraph::InsertPhiNodes() {
   int dalvik_reg;
-  ArenaBitVector* phi_blocks =
-      new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapPhi);
-  ArenaBitVector* tmp_blocks =
-      new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapTmpBlocks);
-  ArenaBitVector* input_blocks =
-      new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapInputBlocks);
-
-  temp_dalvik_register_v_ =
-      new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapRegisterV);
+  ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapPhi);
+  ArenaBitVector* input_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapInputBlocks);
 
   RepeatingPostOrderDfsIterator iter(this);
   bool change = false;
@@ -505,53 +494,23 @@
 
   /* Iterate through each Dalvik register */
   for (dalvik_reg = cu_->num_dalvik_registers - 1; dalvik_reg >= 0; dalvik_reg--) {
-    bool change;
-
     input_blocks->Copy(def_block_matrix_[dalvik_reg]);
     phi_blocks->ClearAllBits();
-
-    /* Calculate the phi blocks for each Dalvik register */
     do {
-      change = false;
-      tmp_blocks->ClearAllBits();
-      ArenaBitVector::Iterator iterator(input_blocks);
-
-      while (true) {
-        int idx = iterator.Next();
-        if (idx == -1) {
-          break;
-        }
+      // TUNING: When we repeat this, we could skip indexes from the previous pass.
+      for (uint32_t idx : input_blocks->Indexes()) {
         BasicBlock* def_bb = GetBasicBlock(idx);
-
-        /* Merge the dominance frontier to tmp_blocks */
-        // TUNING: hot call to Union().
-        if (def_bb->dom_frontier != NULL) {
-          tmp_blocks->Union(def_bb->dom_frontier);
+        if (def_bb->dom_frontier != nullptr) {
+          phi_blocks->Union(def_bb->dom_frontier);
         }
       }
-      if (!phi_blocks->Equal(tmp_blocks)) {
-        change = true;
-        phi_blocks->Copy(tmp_blocks);
-
-        /*
-         * Iterate through the original blocks plus the new ones in
-         * the dominance frontier.
-         */
-        input_blocks->Copy(phi_blocks);
-        input_blocks->Union(def_block_matrix_[dalvik_reg]);
-      }
-    } while (change);
+    } while (input_blocks->Union(phi_blocks));
 
     /*
      * Insert a phi node for dalvik_reg in the phi_blocks if the Dalvik
      * register is in the live-in set.
      */
-    ArenaBitVector::Iterator iterator(phi_blocks);
-    while (true) {
-      int idx = iterator.Next();
-      if (idx == -1) {
-        break;
-      }
+    for (uint32_t idx : phi_blocks->Indexes()) {
       BasicBlock* phi_bb = GetBasicBlock(idx);
       /* Variable will be clobbered before being used - no need for phi */
       if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 0f16ad2..938c5ec 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -174,28 +174,6 @@
   ComputeLiveInAndLiveOutSets();
 }
 
-class InstructionBitVectorIterator : public ValueObject {
- public:
-  InstructionBitVectorIterator(const BitVector& vector,
-                               const GrowableArray<HInstruction*>& instructions)
-        : instructions_(instructions),
-          iterator_(BitVector::Iterator(&vector)),
-          current_bit_index_(iterator_.Next()) {}
-
-  bool Done() const { return current_bit_index_ == -1; }
-  HInstruction* Current() const { return instructions_.Get(current_bit_index_); }
-  void Advance() {
-    current_bit_index_ = iterator_.Next();
-  }
-
- private:
-  const GrowableArray<HInstruction*> instructions_;
-  BitVector::Iterator iterator_;
-  int32_t current_bit_index_;
-
-  DISALLOW_COPY_AND_ASSIGN(InstructionBitVectorIterator);
-};
-
 void SsaLivenessAnalysis::ComputeLiveRanges() {
   // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
@@ -218,10 +196,9 @@
     }
 
     // Add a range that covers this block to all instructions live_in because of successors.
-    for (InstructionBitVectorIterator it(*live_in, instructions_from_ssa_index_);
-         !it.Done();
-         it.Advance()) {
-      it.Current()->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd());
+    for (uint32_t idx : live_in->Indexes()) {
+      HInstruction* current = instructions_from_ssa_index_.Get(idx);
+      current->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd());
     }
 
     for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
@@ -268,11 +245,10 @@
       HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
       // For all live_in instructions at the loop header, we need to create a range
       // that covers the full loop.
-      for (InstructionBitVectorIterator it(*live_in, instructions_from_ssa_index_);
-           !it.Done();
-           it.Advance()) {
-        it.Current()->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
-                                                      back_edge->GetLifetimeEnd());
+      for (uint32_t idx : live_in->Indexes()) {
+        HInstruction* current = instructions_from_ssa_index_.Get(idx);
+        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
+                                                 back_edge->GetLifetimeEnd());
       }
     }
   }
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index a3e2b15..7dad55e 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -45,10 +45,11 @@
     storage_size_(storage_size),
     storage_(storage),
     number_of_bits_(start_bits) {
-  DCHECK_EQ(sizeof(*storage_), 4U);  // Assuming 32-bit units.
+  COMPILE_ASSERT(sizeof(*storage_) == kWordBytes, check_word_bytes);
+  COMPILE_ASSERT(sizeof(*storage_) * 8u == kWordBits, check_word_bits);
   if (storage_ == nullptr) {
     storage_size_ = BitsToWords(start_bits);
-    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * sizeof(*storage_)));
+    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * kWordBytes));
   }
 }
 
@@ -61,7 +62,7 @@
  */
 bool BitVector::IsBitSet(uint32_t num) const {
   // If the index is over the size:
-  if (num >= storage_size_ * sizeof(*storage_) * 8) {
+  if (num >= storage_size_ * kWordBits) {
     // Whether it is expandable or not, this bit does not exist: thus it is not set.
     return false;
   }
@@ -71,7 +72,7 @@
 
 // Mark all bits bit as "clear".
 void BitVector::ClearAllBits() {
-  memset(storage_, 0, storage_size_ * sizeof(*storage_));
+  memset(storage_, 0, storage_size_ * kWordBytes);
 }
 
 // Mark the specified bit as "set".
@@ -80,17 +81,17 @@
  * not using it badly or change resize mechanism.
  */
 void BitVector::SetBit(uint32_t num) {
-  if (num >= storage_size_ * sizeof(*storage_) * 8) {
+  if (num >= storage_size_ * kWordBits) {
     DCHECK(expandable_) << "Attempted to expand a non-expandable bitmap to position " << num;
 
     /* Round up to word boundaries for "num+1" bits */
     uint32_t new_size = BitsToWords(num + 1);
     DCHECK_GT(new_size, storage_size_);
     uint32_t *new_storage =
-        static_cast<uint32_t*>(allocator_->Alloc(new_size * sizeof(*storage_)));
-    memcpy(new_storage, storage_, storage_size_ * sizeof(*storage_));
+        static_cast<uint32_t*>(allocator_->Alloc(new_size * kWordBytes));
+    memcpy(new_storage, storage_, storage_size_ * kWordBytes);
     // Zero out the new storage words.
-    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * sizeof(*storage_));
+    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * kWordBytes);
     // TOTO: collect stats on space wasted because of resize.
     storage_ = new_storage;
     storage_size_ = new_size;
@@ -103,7 +104,7 @@
 // Mark the specified bit as "unset".
 void BitVector::ClearBit(uint32_t num) {
   // If the index is over the size, we don't have to do anything, it is cleared.
-  if (num < storage_size_ * sizeof(*storage_) * 8) {
+  if (num < storage_size_ * kWordBits) {
     // Otherwise, go ahead and clear it.
     storage_[num >> 5] &= ~check_masks[num & 0x1f];
   }
@@ -132,7 +133,7 @@
   //   - Therefore, min_size goes up to at least that, we are thus comparing at least what we need to, but not less.
   //      ie. we are comparing all storage cells that could have difference, if both vectors have cells above our_highest_index,
   //          they are automatically at 0.
-  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) == 0);
+  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * kWordBytes) == 0);
 }
 
 // Intersect with another bit vector.
@@ -180,7 +181,7 @@
     SetBit(highest_bit);
 
     // Paranoid: storage size should be big enough to hold this bit now.
-    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * sizeof(*(storage_)) * 8);
+    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * kWordBits);
   }
 
   for (uint32_t idx = 0; idx < src_size; idx++) {
@@ -215,7 +216,7 @@
     SetBit(highest_bit);
 
     // Paranoid: storage size should be big enough to hold this bit now.
-    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * sizeof(*(storage_)) * 8);
+    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * kWordBits);
   }
 
   uint32_t not_in_size = not_in->GetStorageSize();
@@ -268,14 +269,10 @@
 
 // Count the number of bits that are set in range [0, end).
 uint32_t BitVector::NumSetBits(uint32_t end) const {
-  DCHECK_LE(end, storage_size_ * sizeof(*storage_) * 8);
+  DCHECK_LE(end, storage_size_ * kWordBits);
   return NumSetBits(storage_, end);
 }
 
-BitVector::Iterator* BitVector::GetIterator() const {
-  return new (allocator_) Iterator(this);
-}
-
 /*
  * Mark specified number of bits as "set". Cannot set all bits like ClearAll
  * since there might be unused bits - setting those to one will confuse the
@@ -329,7 +326,7 @@
       }
 
       // Return cnt + how many storage units still remain * the number of bits per unit.
-      int res = cnt + (idx * (sizeof(*storage_) * 8));
+      int res = cnt + (idx * kWordBits);
       return res;
     }
   }
@@ -369,14 +366,14 @@
   SetBit(highest_bit);
 
   // Now set until highest bit's storage.
-  uint32_t size = 1 + (highest_bit / (sizeof(*storage_) * 8));
-  memcpy(storage_, src->GetRawStorage(), sizeof(*storage_) * size);
+  uint32_t size = 1 + (highest_bit / kWordBits);
+  memcpy(storage_, src->GetRawStorage(), kWordBytes * size);
 
   // Set upper bits to 0.
   uint32_t left = storage_size_ - size;
 
   if (left > 0) {
-    memset(storage_ + size, 0, sizeof(*storage_) * left);
+    memset(storage_ + size, 0, kWordBytes * left);
   }
 }
 
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index 2a68396..43e98a7 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -32,59 +32,115 @@
  */
 class BitVector {
   public:
-    class Iterator {
+    class IndexContainer;
+
+    /**
+     * @brief Convenient iterator across the indexes of the BitVector's set bits.
+     *
+     * @details IndexIterator is a Forward iterator (C++11: 24.2.5) from the lowest
+     * to the highest index of the BitVector's set bits. Instances can be retrieved
+     * only through BitVector::Indexes() which returns an IndexContainer wrapper
+     * object with begin() and end() suitable for range-based loops:
+     *   for (uint32_t idx : bit_vector.Indexes()) {
+     *     // Use idx.
+     *   }
+     */
+    class IndexIterator
+        : std::iterator<std::forward_iterator_tag, uint32_t, ptrdiff_t, void, uint32_t> {
       public:
-        explicit Iterator(const BitVector* bit_vector)
-          : p_bits_(bit_vector),
-            bit_storage_(bit_vector->GetRawStorage()),
-            bit_index_(0),
-            bit_size_(p_bits_->storage_size_ * sizeof(uint32_t) * 8) {}
-
-        // Return the position of the next set bit.  -1 means end-of-element reached.
-        int32_t Next() {
-          // Did anything obviously change since we started?
-          DCHECK_EQ(bit_size_, p_bits_->GetStorageSize() * sizeof(uint32_t) * 8);
-          DCHECK_EQ(bit_storage_, p_bits_->GetRawStorage());
-
-          if (UNLIKELY(bit_index_ >= bit_size_)) {
-            return -1;
-          }
-
-          uint32_t word_index = bit_index_ / 32;
-          uint32_t word = bit_storage_[word_index];
-          // Mask out any bits in the first word we've already considered.
-          word >>= bit_index_ & 0x1f;
-          if (word == 0) {
-            bit_index_ &= ~0x1f;
-            do {
-              word_index++;
-              if (UNLIKELY((word_index * 32) >= bit_size_)) {
-                bit_index_ = bit_size_;
-                return -1;
-              }
-              word = bit_storage_[word_index];
-              bit_index_ += 32;
-            } while (word == 0);
-          }
-          bit_index_ += CTZ(word) + 1;
-          return bit_index_ - 1;
+        bool operator==(const IndexIterator& other) const {
+          DCHECK(bit_storage_ == other.bit_storage_);
+          DCHECK_EQ(storage_size_, other.storage_size_);
+          return bit_index_ == other.bit_index_;
         }
 
-        static void* operator new(size_t size, Allocator* allocator) {
-          return allocator->Alloc(sizeof(BitVector::Iterator));
-        };
-        static void operator delete(void* p) {
-          Iterator* it = reinterpret_cast<Iterator*>(p);
-          it->p_bits_->allocator_->Free(p);
+        bool operator!=(const IndexIterator& other) const {
+          return !(*this == other);
+        }
+
+        int operator*() const{
+          DCHECK_LT(bit_index_, BitSize());
+          return bit_index_;
+        }
+
+        IndexIterator& operator++() {
+          DCHECK_LT(bit_index_, BitSize());
+          bit_index_ = FindIndex(bit_index_ + 1u);
+          return *this;
+        }
+
+        IndexIterator operator++(int) {
+          IndexIterator result(*this);
+          ++*this;
+          return result;
+        }
+
+        // Helper function to check for end without comparing with bit_vector.Indexes().end().
+        bool Done() const {
+          return bit_index_ == BitSize();
         }
 
       private:
-        const BitVector* const p_bits_;
-        const uint32_t* const bit_storage_;
-        uint32_t bit_index_;           // Current index (size in bits).
-        const uint32_t bit_size_;      // Size of vector in bits.
+        struct begin_tag { };
+        struct end_tag { };
 
-        friend class BitVector;
+        IndexIterator(const BitVector* bit_vector, begin_tag)
+          : bit_storage_(bit_vector->GetRawStorage()),
+            storage_size_(bit_vector->storage_size_),
+            bit_index_(FindIndex(0u)) { }
+
+        IndexIterator(const BitVector* bit_vector, end_tag)
+          : bit_storage_(bit_vector->GetRawStorage()),
+            storage_size_(bit_vector->storage_size_),
+            bit_index_(BitSize()) { }
+
+        uint32_t BitSize() const {
+          return storage_size_ * kWordBits;
+        }
+
+        uint32_t FindIndex(uint32_t start_index) const {
+          DCHECK_LE(start_index, BitSize());
+          uint32_t word_index = start_index / kWordBits;
+          if (UNLIKELY(word_index == storage_size_)) {
+            return start_index;
+          }
+          uint32_t word = bit_storage_[word_index];
+          // Mask out any bits in the first word we've already considered.
+          word &= static_cast<uint32_t>(-1) << (start_index & 0x1f);
+          while (word == 0u) {
+            ++word_index;
+            if (UNLIKELY(word_index == storage_size_)) {
+              return BitSize();
+            }
+            word = bit_storage_[word_index];
+          }
+          return word_index * 32u + CTZ(word);
+        }
+
+        const uint32_t* const bit_storage_;
+        const uint32_t storage_size_;  // Size of vector in words.
+        uint32_t bit_index_;           // Current index (size in bits).
+
+        friend class BitVector::IndexContainer;
+    };
+
+    /**
+     * @brief BitVector wrapper class for iteration across indexes of set bits.
+     */
+    class IndexContainer {
+     public:
+      explicit IndexContainer(const BitVector* bit_vector) : bit_vector_(bit_vector) { }
+
+      IndexIterator begin() const {
+        return IndexIterator(bit_vector_, IndexIterator::begin_tag());
+      }
+
+      IndexIterator end() const {
+        return IndexIterator(bit_vector_, IndexIterator::end_tag());
+      }
+
+     private:
+      const BitVector* const bit_vector_;
     };
 
     BitVector(uint32_t start_bits,
@@ -127,14 +183,16 @@
     // Number of bits set in range [0, end).
     uint32_t NumSetBits(uint32_t end) const;
 
-    Iterator* GetIterator() const;
+    IndexContainer Indexes() const {
+      return IndexContainer(this);
+    }
 
     uint32_t GetStorageSize() const { return storage_size_; }
     bool IsExpandable() const { return expandable_; }
     uint32_t GetRawStorageWord(size_t idx) const { return storage_[idx]; }
     uint32_t* GetRawStorage() { return storage_; }
     const uint32_t* GetRawStorage() const { return storage_; }
-    size_t GetSizeOf() const { return storage_size_ * sizeof(uint32_t); }
+    size_t GetSizeOf() const { return storage_size_ * kWordBytes; }
 
     /**
      * @return the highest bit set, -1 if none are set
@@ -155,6 +213,9 @@
     void DumpHelper(std::ostringstream& buffer, const char* prefix) const;
 
   private:
+    static constexpr uint32_t kWordBytes = sizeof(uint32_t);
+    static constexpr uint32_t kWordBits = kWordBytes * 8;
+
     Allocator* const allocator_;
     const bool expandable_;         // expand bitmap if we run out?
     uint32_t   storage_size_;       // current size, in 32-bit words.
diff --git a/runtime/base/bit_vector_test.cc b/runtime/base/bit_vector_test.cc
index 0f866a4..1403f50 100644
--- a/runtime/base/bit_vector_test.cc
+++ b/runtime/base/bit_vector_test.cc
@@ -38,11 +38,8 @@
   EXPECT_EQ(0U, bv.GetRawStorageWord(0));
   EXPECT_EQ(0U, *bv.GetRawStorage());
 
-  BitVector::Iterator empty_iterator(&bv);
-  EXPECT_EQ(-1, empty_iterator.Next());
-
-  std::unique_ptr<BitVector::Iterator> empty_iterator_on_heap(bv.GetIterator());
-  EXPECT_EQ(-1, empty_iterator_on_heap->Next());
+  EXPECT_TRUE(bv.Indexes().begin().Done());
+  EXPECT_TRUE(bv.Indexes().begin() == bv.Indexes().end());
 
   bv.SetBit(0);
   bv.SetBit(kBits - 1);
@@ -57,10 +54,14 @@
   EXPECT_EQ(0x80000001U, bv.GetRawStorageWord(0));
   EXPECT_EQ(0x80000001U, *bv.GetRawStorage());
 
-  BitVector::Iterator iterator(&bv);
-  EXPECT_EQ(0, iterator.Next());
-  EXPECT_EQ(static_cast<int>(kBits - 1), iterator.Next());
-  EXPECT_EQ(-1, iterator.Next());
+  BitVector::IndexIterator iterator = bv.Indexes().begin();
+  EXPECT_TRUE(iterator != bv.Indexes().end());
+  EXPECT_EQ(0, *iterator);
+  ++iterator;
+  EXPECT_TRUE(iterator != bv.Indexes().end());
+  EXPECT_EQ(static_cast<int>(kBits - 1), *iterator);
+  ++iterator;
+  EXPECT_TRUE(iterator == bv.Indexes().end());
 }
 
 TEST(BitVector, NoopAllocator) {