56 files changed, 2423 insertions, 876 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index e069d88f19..498f7ef11b 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -25,6 +25,7 @@ TEST_COMMON_SRC_FILES := \
 	runtime/barrier_test.cc \
 	runtime/base/histogram_test.cc \
 	runtime/base/mutex_test.cc \
+	runtime/base/timing_logger_test.cc \
 	runtime/base/unix_file/fd_file_test.cc \
 	runtime/base/unix_file/mapped_file_test.cc \
 	runtime/base/unix_file/null_file_test.cc \
@@ -60,7 +61,10 @@ TEST_COMMON_SRC_FILES := \
 
 ifeq ($(ART_SEA_IR_MODE),true)
 TEST_COMMON_SRC_FILES += \
-	compiler/utils/scoped_hashtable_test.cc
+	compiler/utils/scoped_hashtable_test.cc \
+	compiler/sea_ir/types/type_data_test.cc \
+	compiler/sea_ir/types/type_inference_visitor_test.cc \
+	compiler/sea_ir/ir/regions_test.cc
 endif
 
 TEST_TARGET_SRC_FILES := \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index f81b460ee4..5caf688d29 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -94,9 +94,12 @@ LIBART_COMPILER_SRC_FILES := \
 ifeq ($(ART_SEA_IR_MODE),true)
 LIBART_COMPILER_SRC_FILES += \
 	sea_ir/frontend.cc \
-	sea_ir/instruction_tools.cc \
-	sea_ir/sea.cc \
-	sea_ir/code_gen.cc
+	sea_ir/ir/instruction_tools.cc \
+	sea_ir/ir/sea.cc \
+	sea_ir/code_gen/code_gen.cc \
+	sea_ir/types/type_inference.cc \
+	sea_ir/types/type_inference_visitor.cc \
+	sea_ir/debug/dot_gen.cc
 endif
 
 LIBART_COMPILER_CFLAGS :=
diff --git a/compiler/sea_ir/code_gen.cc b/compiler/sea_ir/code_gen/code_gen.cc
index a513907b38..cb150e58fa 100644
--- a/compiler/sea_ir/code_gen.cc
+++ b/compiler/sea_ir/code_gen/code_gen.cc
@@ -15,8 +15,8 @@
  */
 
 #include <llvm/Support/raw_ostream.h>
-#include "sea.h"
-#include "code_gen.h"
+#include "sea_ir/ir/sea.h"
+#include "sea_ir/code_gen/code_gen.h"
 
 namespace sea_ir {
 
@@ -114,6 +114,14 @@ void CodeGenVisitor::Visit(InstructionNode* instruction) {
   std::string instr = instruction->GetInstruction()->DumpString(NULL);
   DCHECK(0);  // This whole function is useful only during development.
 }
+
+void CodeGenVisitor::Visit(UnnamedConstInstructionNode* instruction) {
+  std::string instr = instruction->GetInstruction()->DumpString(NULL);
+  std::cout << "1.Instruction: " << instr << std::endl;
+  llvm_data_->AddValue(instruction,
+      llvm::ConstantInt::get(*llvm_data_->context_, llvm::APInt(32, instruction->GetConstValue())));
+}
+
 void CodeGenVisitor::Visit(ConstInstructionNode* instruction) {
   std::string instr = instruction->GetInstruction()->DumpString(NULL);
   std::cout << "1.Instruction: " << instr << std::endl;
@@ -123,14 +131,14 @@ void CodeGenVisitor::Visit(ConstInstructionNode* instruction) {
 void CodeGenVisitor::Visit(ReturnInstructionNode* instruction) {
   std::string instr = instruction->GetInstruction()->DumpString(NULL);
   std::cout << "2.Instruction: " << instr << std::endl;
-  DCHECK_GT(instruction->GetSSAUses().size(), 0u);
-  llvm::Value* return_value = llvm_data_->GetValue(instruction->GetSSAUses().at(0));
+  DCHECK_GT(instruction->GetSSAProducers().size(), 0u);
+  llvm::Value* return_value = llvm_data_->GetValue(instruction->GetSSAProducers().at(0));
   llvm_data_->builder_.CreateRet(return_value);
 }
 void CodeGenVisitor::Visit(IfNeInstructionNode* instruction) {
   std::string instr = instruction->GetInstruction()->DumpString(NULL);
   std::cout << "3.Instruction: " << instr << std::endl;
-  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAUses();
+  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAProducers();
   DCHECK_GT(ssa_uses.size(), 1u);
   InstructionNode* use_l = ssa_uses.at(0);
   llvm::Value* left = llvm_data_->GetValue(use_l);
@@ -171,7 +179,7 @@ void CodeGenVisitor::Visit(MoveResultInstructionNode* instruction) {
   // since their purpose of minimizing the number of opcodes in dex is
   // not relevant for the IR. (Will need to have different
   // instruction subclasses for functions and procedures.)
-  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAUses();
+  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAProducers();
   InstructionNode* use_l = ssa_uses.at(0);
   llvm::Value* left = llvm_data_->GetValue(use_l);
   llvm::Value* right = llvm::ConstantInt::get(*llvm_data_->context_, llvm::APInt(32, 0));
@@ -187,7 +195,7 @@ void CodeGenVisitor::Visit(InvokeStaticInstructionNode* invoke) {
   // TODO: Add proper checking of the matching between formal and actual signature.
   DCHECK(NULL != callee);
   std::vector<llvm::Value*> parameter_values;
-  std::vector<InstructionNode*> parameter_sources = invoke->GetSSAUses();
+  std::vector<InstructionNode*> parameter_sources = invoke->GetSSAProducers();
   for (std::vector<InstructionNode*>::const_iterator cit = parameter_sources.begin();
       cit != parameter_sources.end(); ++cit) {
     llvm::Value* parameter_value = llvm_data_->GetValue((*cit));
@@ -201,7 +209,7 @@ void CodeGenVisitor::Visit(InvokeStaticInstructionNode* invoke) {
 void CodeGenVisitor::Visit(AddIntInstructionNode* instruction) {
   std::string instr = instruction->GetInstruction()->DumpString(NULL);
   std::cout << "7.Instruction: " << instr << std::endl;
-  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAUses();
+  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAProducers();
   DCHECK_GT(ssa_uses.size(), 1u);
   InstructionNode* use_l = ssa_uses.at(0);
   InstructionNode* use_r = ssa_uses.at(1);
@@ -221,7 +229,7 @@ void CodeGenVisitor::Visit(GotoInstructionNode* instruction) {
 void CodeGenVisitor::Visit(IfEqzInstructionNode* instruction) {
   std::string instr = instruction->GetInstruction()->DumpString(NULL);
   std::cout << "9. Instruction: " << instr << "; Id: " <<instruction << std::endl;
-  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAUses();
+  std::vector<InstructionNode*> ssa_uses = instruction->GetSSAProducers();
   DCHECK_GT(ssa_uses.size(), 0u);
   InstructionNode* use_l = ssa_uses.at(0);
   llvm::Value* left = llvm_data_->GetValue(use_l);
diff --git a/compiler/sea_ir/code_gen.h b/compiler/sea_ir/code_gen/code_gen.h
index aba8d5c7f8..b1bc4dc2da 100644
--- a/compiler/sea_ir/code_gen.h
+++ b/compiler/sea_ir/code_gen/code_gen.h
@@ -14,14 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_SEA_IR_CODE_GEN_H_
-#define ART_COMPILER_SEA_IR_CODE_GEN_H_
+#ifndef ART_COMPILER_SEA_IR_CODE_GEN_CODE_GEN_H_
+#define ART_COMPILER_SEA_IR_CODE_GEN_CODE_GEN_H_
 
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Analysis/Verifier.h"
-#include "visitor.h"
+#include "sea_ir/ir/visitor.h"
 
 namespace sea_ir {
 // Abstracts away the containers we use to map SEA IR objects to LLVM IR objects.
@@ -100,6 +101,8 @@ class CodeGenPrepassVisitor: public CodeGenPassVisitor {
   void Visit(SignatureNode* region);
   void Visit(Region* region);
   void Visit(InstructionNode* instruction) { }
+
+  void Visit(UnnamedConstInstructionNode* instruction) { }
   void Visit(ConstInstructionNode* instruction) { }
   void Visit(ReturnInstructionNode* instruction) { }
   void Visit(IfNeInstructionNode* instruction) { }
@@ -119,6 +122,7 @@ class CodeGenPostpassVisitor: public CodeGenPassVisitor {
   void Visit(SignatureNode* region);
   void Visit(Region* region);
   void Visit(InstructionNode* region) { }
+  void Visit(UnnamedConstInstructionNode* instruction) { }
   void Visit(ConstInstructionNode* instruction) { }
   void Visit(ReturnInstructionNode* instruction) { }
   void Visit(IfNeInstructionNode* instruction) { }
@@ -138,10 +142,10 @@ class CodeGenVisitor: public CodeGenPassVisitor {
   void Visit(SignatureNode* region);
   void Visit(Region* region);
   void Visit(InstructionNode* region);
+  void Visit(UnnamedConstInstructionNode* instruction);
   void Visit(ConstInstructionNode* instruction);
   void Visit(ReturnInstructionNode* instruction);
   void Visit(IfNeInstructionNode* instruction);
-  // void Visit(AddIntLitInstructionNode* instruction);
   void Visit(MoveResultInstructionNode* instruction);
   void Visit(InvokeStaticInstructionNode* instruction);
   void Visit(AddIntInstructionNode* instruction);
@@ -150,4 +154,4 @@ class CodeGenVisitor: public CodeGenPassVisitor {
   void Visit(PhiInstructionNode* region) { }
 };
 }  // namespace sea_ir
-#endif  // ART_COMPILER_SEA_IR_CODE_GEN_H_
+#endif  // ART_COMPILER_SEA_IR_CODE_GEN_CODE_GEN_H_
diff --git a/compiler/sea_ir/debug/dot_gen.cc b/compiler/sea_ir/debug/dot_gen.cc
new file mode 100644
index 0000000000..9442684a52
--- /dev/null
+++ b/compiler/sea_ir/debug/dot_gen.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "scoped_thread_state_change.h"
+#include "sea_ir/debug/dot_gen.h"
+
+namespace sea_ir {
+
+void DotGenerationVisitor::Initialize(SeaGraph* graph) {
+  graph_ = graph;
+  Region* root_region;
+  ordered_regions_.clear();
+  for (std::vector<Region*>::const_iterator cit = graph->GetRegions()->begin();
+      cit != graph->GetRegions()->end(); cit++ ) {
+    if ((*cit)->GetIDominator() == (*cit)) {
+      root_region = *cit;
+    }
+  }
+  ordered_regions_.push_back(root_region);
+  for (unsigned int id = 0; id < ordered_regions_.size(); id++) {
+    Region* current_region = ordered_regions_.at(id);
+    const std::set<Region*>* dominated_regions = current_region->GetIDominatedSet();
+    for (std::set<Region*>::const_iterator cit = dominated_regions->begin();
+        cit != dominated_regions->end(); cit++ ) {
+      ordered_regions_.push_back(*cit);
+    }
+  }
+}
+
+void DotGenerationVisitor::ToDotSSAEdges(InstructionNode* instruction) {
+  std::map<int, InstructionNode*>* definition_edges = instruction->GetSSAProducersMap();
+  // SSA definitions:
+  for (std::map<int, InstructionNode*>::const_iterator
+      def_it = definition_edges->begin();
+      def_it != definition_edges->end(); def_it++) {
+    if (NULL != def_it->second) {
+      dot_text_ += def_it->second->StringId() + " -> ";
+      dot_text_ += instruction->StringId() + "[color=gray,label=\"";
+      dot_text_ += art::StringPrintf("vR = %d", def_it->first);
+      art::SafeMap<int, const Type*>::const_iterator type_it = types_->find(def_it->second->Id());
+      if (type_it != types_->end()) {
+        art::ScopedObjectAccess soa(art::Thread::Current());
+        dot_text_ += "(" + type_it->second->Dump() + ")";
+      } else {
+        dot_text_ += "()";
+      }
+      dot_text_ += "\"] ; // SSA edge\n";
+    }
+  }
+
+  // SSA used-by:
+  if (options_->WillSaveUseEdges()) {
+    std::vector<InstructionNode*>* used_in = instruction->GetSSAConsumers();
+    for (std::vector<InstructionNode*>::const_iterator cit = used_in->begin();
+        cit != used_in->end(); cit++) {
+      dot_text_ += (*cit)->StringId() + " -> " + instruction->StringId() + "[color=gray,label=\"";
+      dot_text_ += "\"] ; // SSA used-by edge\n";
+    }
+  }
+}
+
+void DotGenerationVisitor::ToDotSSAEdges(PhiInstructionNode* instruction) {
+  std::vector<InstructionNode*> definition_edges = instruction->GetSSAProducers();
+  // SSA definitions:
+  for (std::vector<InstructionNode*>::const_iterator
+      def_it = definition_edges.begin();
+      def_it != definition_edges.end(); def_it++) {
+    if (NULL != *def_it) {
+      dot_text_ += (*def_it)->StringId() + " -> ";
+      dot_text_ += instruction->StringId() + "[color=gray,label=\"";
+      dot_text_ += art::StringPrintf("vR = %d", instruction->GetRegisterNumber());
+      art::SafeMap<int, const Type*>::const_iterator type_it = types_->find((*def_it)->Id());
+      if (type_it != types_->end()) {
+        art::ScopedObjectAccess soa(art::Thread::Current());
+        dot_text_ += "(" + type_it->second->Dump() + ")";
+      } else {
+        dot_text_ += "()";
+      }
+      dot_text_ += "\"] ; // SSA edge\n";
+    }
+  }
+
+  // SSA used-by:
+  if (options_->WillSaveUseEdges()) {
+    std::vector<InstructionNode*>* used_in = instruction->GetSSAConsumers();
+    for (std::vector<InstructionNode*>::const_iterator cit = used_in->begin();
+        cit != used_in->end(); cit++) {
+      dot_text_ += (*cit)->StringId() + " -> " + instruction->StringId() + "[color=gray,label=\"";
+      dot_text_ += "\"] ; // SSA used-by edge\n";
+    }
+  }
+}
+
+void DotGenerationVisitor::Visit(SignatureNode* parameter) {
+  dot_text_ += parameter->StringId() +" [label=\"[" + parameter->StringId() + "] signature:";
+  dot_text_ += art::StringPrintf("r%d", parameter->GetResultRegister());
+  dot_text_ += "\"] // signature node\n";
+  ToDotSSAEdges(parameter);
+}
+
+// Appends to @result a dot language formatted string representing the node and
+//    (by convention) outgoing edges, so that the composition of theToDot() of all nodes
+//    builds a complete dot graph (without prolog and epilog though).
+void DotGenerationVisitor::Visit(Region* region) {
+  dot_text_ += "\n// Region: \nsubgraph " + region->StringId();
+  dot_text_ += " { label=\"region " + region->StringId() + "(rpo=";
+  dot_text_ += art::StringPrintf("%d", region->GetRPO());
+  if (NULL != region->GetIDominator()) {
+    dot_text_ += " dom=" + region->GetIDominator()->StringId();
+  }
+  dot_text_ += ")\";\n";
+
+  std::vector<PhiInstructionNode*>* phi_instructions = region->GetPhiNodes();
+  for (std::vector<PhiInstructionNode*>::const_iterator cit = phi_instructions->begin();
+        cit != phi_instructions->end(); cit++) {
+    dot_text_ += (*cit)->StringId() +";\n";
+  }
+  std::vector<InstructionNode*>* instructions = region->GetInstructions();
+  for (std::vector<InstructionNode*>::const_iterator cit = instructions->begin();
+        cit != instructions->end(); cit++) {
+      dot_text_ += (*cit)->StringId() +";\n";
+    }
+
+  dot_text_ += "} // End Region.\n";
+  std::vector<Region*>* successors =  region->GetSuccessors();
+  for (std::vector<Region*>::const_iterator cit = successors->begin(); cit != successors->end();
+      cit++) {
+    DCHECK(NULL != *cit) << "Null successor found for SeaNode" <<
+        region->GetLastChild()->StringId() << ".";
+    dot_text_ += region->GetLastChild()->StringId() + " -> " +
+        (*cit)->GetLastChild()->StringId() +
+        "[lhead=" + (*cit)->StringId() + ", " + "ltail=" + region->StringId() + "];\n\n";
+  }
+}
+void DotGenerationVisitor::Visit(InstructionNode* instruction) {
+  dot_text_ += "// Instruction ("+instruction->StringId()+"): \n" + instruction->StringId() +
+      " [label=\"[" + instruction->StringId() + "] " +
+      instruction->GetInstruction()->DumpString(graph_->GetDexFile()) + "\"";
+  dot_text_ += "];\n";
+  ToDotSSAEdges(instruction);
+}
+
+void DotGenerationVisitor::Visit(UnnamedConstInstructionNode* instruction) {
+  dot_text_ += "// Instruction ("+instruction->StringId()+"): \n" + instruction->StringId() +
+        " [label=\"[" + instruction->StringId() + "] const/x v-3, #" +
+        art::StringPrintf("%d", instruction->GetConstValue()) + "\"";
+  dot_text_ += "];\n";
+  ToDotSSAEdges(instruction);
+}
+
+void DotGenerationVisitor::Visit(PhiInstructionNode* phi) {
+  dot_text_ += "// PhiInstruction: \n" + phi->StringId() +
+      " [label=\"[" + phi->StringId() + "] PHI(";
+  dot_text_ += art::StringPrintf("%d", phi->GetRegisterNumber());
+  dot_text_ += ")\"";
+  dot_text_ += "];\n";
+  ToDotSSAEdges(phi);
+}
+}  // namespace sea_ir
diff --git a/compiler/sea_ir/debug/dot_gen.h b/compiler/sea_ir/debug/dot_gen.h
new file mode 100644
index 0000000000..675d83d515
--- /dev/null
+++ b/compiler/sea_ir/debug/dot_gen.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_SEA_IR_DEBUG_DOT_GEN_H_
+#define ART_COMPILER_SEA_IR_DEBUG_DOT_GEN_H_
+
+#include "safe_map.h"
+#include "base/stringprintf.h"
+#include "file_output_stream.h"
+#include "sea_ir/ir/sea.h"
+#include "sea_ir/types/type_inference.h"
+
+namespace sea_ir {
+
+class DotConversionOptions {
+ public:
+  DotConversionOptions(): save_use_edges_(false) { }
+  bool WillSaveUseEdges() const {
+    return save_use_edges_;
+  }
+ private:
+  bool save_use_edges_;
+};
+
+class DotGenerationVisitor: public IRVisitor {
+ public:
+  explicit DotGenerationVisitor(const DotConversionOptions* const options,
+      art::SafeMap<int, const Type*>* types): graph_(), types_(types), options_(options) { }
+
+  virtual void Initialize(SeaGraph* graph);
+  // Saves the ssa def->use edges corresponding to @instruction.
+  void ToDotSSAEdges(InstructionNode* instruction);
+  void ToDotSSAEdges(PhiInstructionNode* instruction);
+  void Visit(SeaGraph* graph) {
+    dot_text_ += "digraph seaOfNodes {\ncompound=true\n";
+  }
+  void Visit(SignatureNode* parameter);
+
+  // Appends to @result a dot language formatted string representing the node and
+  //    (by convention) outgoing edges, so that the composition of theToDot() of all nodes
+  //    builds a complete dot graph (without prolog and epilog though).
+  void Visit(Region* region);
+  void Visit(InstructionNode* instruction);
+  void Visit(PhiInstructionNode* phi);
+  void Visit(UnnamedConstInstructionNode* instruction);
+
+  void Visit(ConstInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+  void Visit(ReturnInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+  void Visit(IfNeInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+  void Visit(MoveResultInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+  void Visit(InvokeStaticInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+  void Visit(AddIntInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+  void Visit(GotoInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+  void Visit(IfEqzInstructionNode* instruction) {
+    Visit(reinterpret_cast<InstructionNode*>(instruction));
+  }
+
+  std::string GetResult() const {
+    return dot_text_;
+  }
+
+ private:
+  std::string dot_text_;
+  SeaGraph* graph_;
+  art::SafeMap<int, const Type*>* types_;
+  const DotConversionOptions* const options_;
+};
+
+// Stores options for turning a SEA IR graph to a .dot file.
+class DotConversion {
+ public:
+  DotConversion(): options_() { }
+  // Saves to @filename the .dot representation of @graph with the options @options.
+  void DumpSea(SeaGraph* graph, std::string filename,
+      art::SafeMap<int, const Type*>* types) const {
+    LOG(INFO) << "Starting to write SEA string to file.";
+    DotGenerationVisitor dgv = DotGenerationVisitor(&options_, types);
+    graph->Accept(&dgv);
+    art::File* file = art::OS::OpenFile(filename.c_str(), true, true);
+    art::FileOutputStream fos(file);
+    std::string graph_as_string = dgv.GetResult();
+    graph_as_string += "}";
+    fos.WriteFully(graph_as_string.c_str(), graph_as_string.size());
+    LOG(INFO) << "Written SEA string to file.";
+  }
+
+ private:
+  DotConversionOptions options_;
+};
+
+}  // namespace sea_ir
+#endif  // ART_COMPILER_SEA_IR_DEBUG_DOT_GEN_H_
diff --git a/compiler/sea_ir/frontend.cc b/compiler/sea_ir/frontend.cc
index 5843388c42..e24d07d50c 100644
--- a/compiler/sea_ir/frontend.cc
+++ b/compiler/sea_ir/frontend.cc
@@ -23,14 +23,17 @@
 #include "llvm/llvm_compilation_unit.h"
 #include "mirror/object.h"
 #include "runtime.h"
-#include "sea_ir/sea.h"
+#include "safe_map.h"
 
+#include "sea_ir/ir/sea.h"
+#include "sea_ir/debug/dot_gen.h"
+#include "sea_ir/types/types.h"
 namespace art {
 
 static CompiledMethod* CompileMethodWithSeaIr(CompilerDriver& compiler,
                                      const CompilerBackend compiler_backend,
                                      const DexFile::CodeItem* code_item,
-                                     uint32_t access_flags, InvokeType invoke_type,
+                                     uint32_t method_access_flags, InvokeType invoke_type,
                                      uint32_t class_def_idx, uint32_t method_idx,
                                      jobject class_loader, const DexFile& dex_file
 #if defined(ART_USE_PORTABLE_COMPILER)
@@ -40,9 +43,11 @@ static CompiledMethod* CompileMethodWithSeaIr(CompilerDriver& compiler,
   // NOTE: Instead of keeping the convention from the Dalvik frontend.cc
   //       and silencing the cpplint.py warning, I just corrected the formatting.
   VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
-  sea_ir::SeaGraph* sg = sea_ir::SeaGraph::GetCurrentGraph(dex_file);
-  sg->CompileMethod(code_item, class_def_idx, method_idx, dex_file);
-  sg->DumpSea("/tmp/temp.dot");
+  sea_ir::SeaGraph* ir_graph = sea_ir::SeaGraph::GetGraph(dex_file);
+  ir_graph->CompileMethod(code_item, class_def_idx, method_idx, method_access_flags, dex_file);
+  sea_ir::DotConversion dc;
+  SafeMap<int, const sea_ir::Type*>*  types = ir_graph->ti_->GetTypeMap();
+  dc.DumpSea(ir_graph, "/tmp/temp.dot", types);
   CHECK(0 && "No SEA compiled function exists yet.");
   return NULL;
 }
@@ -50,14 +55,14 @@ static CompiledMethod* CompileMethodWithSeaIr(CompilerDriver& compiler,
 CompiledMethod* SeaIrCompileOneMethod(CompilerDriver& compiler,
                                  const CompilerBackend backend,
                                  const DexFile::CodeItem* code_item,
-                                 uint32_t access_flags,
+                                 uint32_t method_access_flags,
                                  InvokeType invoke_type,
                                  uint32_t class_def_idx,
                                  uint32_t method_idx,
                                  jobject class_loader,
                                  const DexFile& dex_file,
                                  llvm::LlvmCompilationUnit* llvm_compilation_unit) {
-  return CompileMethodWithSeaIr(compiler, backend, code_item, access_flags, invoke_type,
+  return CompileMethodWithSeaIr(compiler, backend, code_item, method_access_flags, invoke_type,
       class_def_idx, method_idx, class_loader, dex_file
 #if defined(ART_USE_PORTABLE_COMPILER)
                        , llvm_compilation_unit
@@ -68,13 +73,13 @@ CompiledMethod* SeaIrCompileOneMethod(CompilerDriver& compiler,
 extern "C" art::CompiledMethod*
     SeaIrCompileMethod(art::CompilerDriver& compiler,
                           const art::DexFile::CodeItem* code_item,
-                          uint32_t access_flags, art::InvokeType invoke_type,
+                          uint32_t method_access_flags, art::InvokeType invoke_type,
                           uint32_t class_def_idx, uint32_t method_idx, jobject class_loader,
                           const art::DexFile& dex_file) {
   // TODO: Check method fingerprint here to determine appropriate backend type.
   //       Until then, use build default
   art::CompilerBackend backend = compiler.GetCompilerBackend();
-  return art::SeaIrCompileOneMethod(compiler, backend, code_item, access_flags, invoke_type,
+  return art::SeaIrCompileOneMethod(compiler, backend, code_item, method_access_flags, invoke_type,
                                class_def_idx, method_idx, class_loader, dex_file,
                                NULL /* use thread llvm_info */);
 }
diff --git a/compiler/sea_ir/instruction_nodes.h b/compiler/sea_ir/ir/instruction_nodes.h
index 6f9bdddf77..906a10fe27 100644
--- a/compiler/sea_ir/instruction_nodes.h
+++ b/compiler/sea_ir/ir/instruction_nodes.h
@@ -14,11 +14,12 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_SEA_IR_INSTRUCTION_NODES_H_
-#define ART_COMPILER_SEA_IR_INSTRUCTION_NODES_H_
-#include "sea_node.h"
-#include "visitor.h"
+#ifndef ART_COMPILER_SEA_IR_IR_INSTRUCTION_NODES_H_
+#define ART_COMPILER_SEA_IR_IR_INSTRUCTION_NODES_H_
 #include "dex_instruction-inl.h"
+#include "sea_ir/ir/sea_node.h"
+#include "sea_ir/ir/visitor.h"
+
 
 namespace sea_ir {
 
@@ -48,9 +49,7 @@ class InstructionNode: public SeaNode {
   // Returns the set of registers defined by the current instruction.
   virtual std::vector<int> GetDefinitions() const;
   // Returns the set of register numbers that are used by the instruction.
-  virtual std::vector<int> GetUses();
-  // Appends to @result the .dot string representation of the instruction.
-  virtual void ToDot(std::string& result, const art::DexFile& dex_file) const;
+  virtual std::vector<int> GetUses() const;
   // Mark the current instruction as a downward exposed definition.
   void MarkAsDEDef();
   // Rename the use of @reg_no to refer to the instruction @definition,
@@ -61,7 +60,7 @@ class InstructionNode: public SeaNode {
   }
   // Returns the ordered set of Instructions that define the input operands of this instruction.
   // Precondition: SeaGraph.ConvertToSSA().
-  std::vector<InstructionNode*> GetSSAUses() {
+  virtual std::vector<InstructionNode*> GetSSAProducers() {
     std::vector<int> uses = GetUses();
     std::vector<InstructionNode*> ssa_uses;
     for (std::vector<int>::const_iterator cit = uses.begin(); cit != uses.end(); cit++) {
@@ -69,11 +68,15 @@ class InstructionNode: public SeaNode {
     }
     return ssa_uses;
   }
-
+  std::map<int, InstructionNode* >* GetSSAProducersMap() {
+    return &definition_edges_;
+  }
+  std::vector<InstructionNode*>* GetSSAConsumers() {
+    return &used_in_;
+  }
   virtual void AddSSAUse(InstructionNode* use) {
     used_in_.push_back(use);
   }
-
   void Accept(IRVisitor* v) {
     v->Visit(this);
     v->Traverse(this);
@@ -91,11 +94,10 @@ class InstructionNode: public SeaNode {
  protected:
   explicit InstructionNode(const art::Instruction* in):
       SeaNode(), instruction_(in), used_in_(), de_def_(false), region_(NULL) { }
-  void ToDotSSAEdges(std::string& result) const;
 
  protected:
   const art::Instruction* const instruction_;
-  std::map<int, InstructionNode* > definition_edges_;
+  std::map<int, InstructionNode* > definition_edges_;  // Maps used registers to their definitions.
   // Stores pointers to instructions that use the result of the current instruction.
   std::vector<InstructionNode*> used_in_;
   bool de_def_;
@@ -121,6 +123,7 @@ class UnnamedConstInstructionNode: public ConstInstructionNode {
  public:
   explicit UnnamedConstInstructionNode(const art::Instruction* inst, int32_t value):
       ConstInstructionNode(inst), value_(value) { }
+
   void Accept(IRVisitor* v) {
     v->Visit(this);
     v->Traverse(this);
@@ -134,19 +137,6 @@ class UnnamedConstInstructionNode: public ConstInstructionNode {
     return value_;
   }
 
-  void ToDot(std::string& result, const art::DexFile& dex_file) const {
-    std::ostringstream sstream;
-    sstream << GetConstValue();
-    const std::string value_as_string(sstream.str());
-    result += "// Instruction ("+StringId()+"): \n" + StringId() +
-        " [label=\"const/x v-3, #"+ value_as_string + "\"";
-    if (de_def_) {
-      result += "style=bold";
-    }
-    result += "];\n";
-    ToDotSSAEdges(result);
-  }
-
  private:
   const int32_t value_;
 };
@@ -176,7 +166,7 @@ class IfNeInstructionNode: public InstructionNode {
 class MoveResultInstructionNode: public InstructionNode {
  public:
   explicit MoveResultInstructionNode(const art::Instruction* inst): InstructionNode(inst) { }
-  std::vector<int> GetUses() {
+  std::vector<int> GetUses() const {
     std::vector<int> uses;  // Using vector<> instead of set<> because order matters.
     uses.push_back(RETURN_REGISTER);
     return uses;
@@ -213,7 +203,7 @@ class AddIntLitInstructionNode: public AddIntInstructionNode {
   explicit AddIntLitInstructionNode(const art::Instruction* inst):
       AddIntInstructionNode(inst) { }
 
-  std::vector<int> GetUses() {
+  std::vector<int> GetUses() const {
     std::vector<int> uses =  AddIntInstructionNode::GetUses();
     uses.push_back(UNNAMED_CONST_REGISTER);
     return uses;
@@ -245,4 +235,4 @@ class IfEqzInstructionNode: public InstructionNode {
   }
 };
 }  // namespace sea_ir
-#endif  // ART_COMPILER_SEA_IR_INSTRUCTION_NODES_H_
+#endif  // ART_COMPILER_SEA_IR_IR_INSTRUCTION_NODES_H_
diff --git a/compiler/sea_ir/instruction_tools.cc b/compiler/sea_ir/ir/instruction_tools.cc
index 9627497805..143209de75 100644
--- a/compiler/sea_ir/instruction_tools.cc
+++ b/compiler/sea_ir/ir/instruction_tools.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "instruction_tools.h"
+#include "sea_ir/ir/instruction_tools.h"
 
 namespace sea_ir {
 
diff --git a/compiler/sea_ir/instruction_tools.h b/compiler/sea_ir/ir/instruction_tools.h
index d3871008a4..895e01732a 100644
--- a/compiler/sea_ir/instruction_tools.h
+++ b/compiler/sea_ir/ir/instruction_tools.h
@@ -17,8 +17,8 @@
 #include "sea.h"
 #include "dex_instruction.h"
 
-#ifndef ART_COMPILER_SEA_IR_INSTRUCTION_TOOLS_H_
-#define ART_COMPILER_SEA_IR_INSTRUCTION_TOOLS_H_
+#ifndef ART_COMPILER_SEA_IR_IR_INSTRUCTION_TOOLS_H_
+#define ART_COMPILER_SEA_IR_IR_INSTRUCTION_TOOLS_H_
 
 
 // Note: This file has content cannibalized for SEA_IR from the MIR implementation,
@@ -122,4 +122,4 @@ class InstructionTools {
   static const int instruction_attributes_[];
 };
 }  // namespace sea_ir
-#endif  // ART_COMPILER_SEA_IR_INSTRUCTION_TOOLS_H_
+#endif  // ART_COMPILER_SEA_IR_IR_INSTRUCTION_TOOLS_H_
diff --git a/compiler/sea_ir/ir/regions_test.cc b/compiler/sea_ir/ir/regions_test.cc
new file mode 100644
index 0000000000..9813465db8
--- /dev/null
+++ b/compiler/sea_ir/ir/regions_test.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test.h"
+#include "sea_ir/ir/sea.h"
+
+using utils::ScopedHashtable;
+
+namespace sea_ir {
+
+class RegionsTest : public art::CommonTest {
+};
+
+TEST_F(RegionsTest, Basics) {
+  sea_ir::SeaGraph sg(*java_lang_dex_file_);
+  sea_ir::Region* root = sg.GetNewRegion();
+  sea_ir::Region* then_region = sg.GetNewRegion();
+  sea_ir::Region* else_region = sg.GetNewRegion();
+  std::vector<sea_ir::Region*>* regions = sg.GetRegions();
+  // Test that regions have been registered correctly as children of the graph.
+  EXPECT_TRUE(std::find(regions->begin(), regions->end(), root) != regions->end());
+  EXPECT_TRUE(std::find(regions->begin(), regions->end(), then_region) != regions->end());
+  EXPECT_TRUE(std::find(regions->begin(), regions->end(), else_region) != regions->end());
+  // Check that an edge recorded correctly in both the head and the tail.
+  sg.AddEdge(root, then_region);
+  std::vector<sea_ir::Region*>* succs = root->GetSuccessors();
+  EXPECT_EQ(1U, succs->size());
+  EXPECT_EQ(then_region, succs->at(0));
+  std::vector<sea_ir::Region*>* preds = then_region->GetPredecessors();
+  EXPECT_EQ(1U, preds->size());
+  EXPECT_EQ(root, preds->at(0));
+  // Check that two edges are recorded properly for both head and tail.
+  sg.AddEdge(root, else_region);
+  succs = root->GetSuccessors();
+  EXPECT_EQ(2U, succs->size());
+  EXPECT_TRUE(std::find(succs->begin(), succs->end(), then_region) != succs->end());
+  EXPECT_TRUE(std::find(succs->begin(), succs->end(), else_region) != succs->end());
+  preds = then_region->GetPredecessors();
+  EXPECT_EQ(1U, preds->size());
+  EXPECT_EQ(root, preds->at(0));
+  preds = else_region->GetPredecessors();
+  EXPECT_EQ(1U, preds->size());
+  EXPECT_EQ(root, preds->at(0));
+}
+
+}  // namespace art
diff --git a/compiler/sea_ir/sea.cc b/compiler/sea_ir/ir/sea.cc
index 99b21f8771..08fe0e1fac 100644
--- a/compiler/sea_ir/sea.cc
+++ b/compiler/sea_ir/ir/sea.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 #include "base/stringprintf.h"
-#include "file_output_stream.h"
-#include "instruction_tools.h"
-#include "sea.h"
-#include "code_gen.h"
+#include "sea_ir/ir/instruction_tools.h"
+#include "sea_ir/ir/sea.h"
+#include "sea_ir/code_gen/code_gen.h"
+#include "sea_ir/types/type_inference.h"
 
 #define MAX_REACHING_DEF_ITERERATIONS (10)
 // TODO: When development is done, this define should not
@@ -35,7 +35,6 @@ void IRVisitor::Traverse(Region* region) {
       cit != phis->end(); cit++) {
     (*cit)->Accept(this);
   }
-
   std::vector<InstructionNode*>* instructions = region->GetInstructions();
   for (std::vector<InstructionNode*>::const_iterator cit = instructions->begin();
       cit != instructions->end(); cit++) {
@@ -50,24 +49,10 @@ void IRVisitor::Traverse(SeaGraph* graph) {
   }
 }
 
-SeaGraph* SeaGraph::GetCurrentGraph(const art::DexFile& dex_file) {
+SeaGraph* SeaGraph::GetGraph(const art::DexFile& dex_file) {
   return new SeaGraph(dex_file);
 }
 
-void SeaGraph::DumpSea(std::string filename) const {
-  LOG(INFO) << "Starting to write SEA string to file.";
-  std::string result;
-  result += "digraph seaOfNodes {\ncompound=true\n";
-  for (std::vector<Region*>::const_iterator cit = regions_.begin(); cit != regions_.end(); cit++) {
-    (*cit)->ToDot(result, dex_file_);
-  }
-  result += "}\n";
-  art::File* file = art::OS::OpenFile(filename.c_str(), true, true);
-  art::FileOutputStream fos(file);
-  fos.WriteFully(result.c_str(), result.size());
-  LOG(INFO) << "Written SEA string to file.";
-}
-
 void SeaGraph::AddEdge(Region* src, Region* dst) const {
   src->AddSuccessor(dst);
   dst->AddPredecessor(src);
@@ -191,10 +176,12 @@ void SeaGraph::ComputeReachingDefs() {
 
 
 void SeaGraph::BuildMethodSeaGraph(const art::DexFile::CodeItem* code_item,
-    const art::DexFile& dex_file, uint32_t class_def_idx, uint32_t method_idx) {
+    const art::DexFile& dex_file, uint32_t class_def_idx,
+    uint32_t method_idx, uint32_t method_access_flags) {
+  code_item_ = code_item;
   class_def_idx_ = class_def_idx;
   method_idx_ = method_idx;
-
+  method_access_flags_ = method_access_flags;
   const uint16_t* code = code_item->insns_;
   const size_t size_in_code_units = code_item->insns_size_in_code_units_;
   // This maps target instruction pointers to their corresponding region objects.
@@ -225,8 +212,9 @@ void SeaGraph::BuildMethodSeaGraph(const art::DexFile::CodeItem* code_item,
   // Insert one SignatureNode per function argument,
   // to serve as placeholder definitions in dataflow analysis.
   for (unsigned int crt_offset = 0; crt_offset < code_item->ins_size_; crt_offset++) {
+    int position = crt_offset;  // TODO: Is this the correct offset in the signature?
     SignatureNode* parameter_def_node =
-        new sea_ir::SignatureNode(code_item->registers_size_ - 1 - crt_offset);
+        new sea_ir::SignatureNode(code_item->registers_size_ - 1 - crt_offset, position);
     AddParameterNode(parameter_def_node);
     r->AddChild(parameter_def_node);
   }
@@ -260,12 +248,8 @@ void SeaGraph::BuildMethodSeaGraph(const art::DexFile::CodeItem* code_item,
         }
         r = nextRegion;
       }
-      bool definesRegister = (0 != InstructionTools::instruction_attributes_[inst->Opcode()]
-          && (1 << kDA));
-      LOG(INFO)<< inst->GetDexPc(code) << "*** " << inst->DumpString(&dex_file)
-      << " region:" <<r->StringId() << "Definition?" << definesRegister << std::endl;
       r->AddChild(node);
-      }
+    }
     i += inst->SizeInCodeUnits();
   }
 }
@@ -417,10 +401,10 @@ void SeaGraph::GenerateLLVM() {
   code_gen_postpass_visitor.Write(std::string("my_file.llvm"));
 }
 
-void SeaGraph::CompileMethod(const art::DexFile::CodeItem* code_item,
-  uint32_t class_def_idx, uint32_t method_idx, const art::DexFile& dex_file) {
+void SeaGraph::CompileMethod(const art::DexFile::CodeItem* code_item, uint32_t class_def_idx,
+    uint32_t method_idx, uint32_t method_access_flags, const art::DexFile& dex_file) {
   // Two passes: Builds the intermediate structure (non-SSA) of the sea-ir for the function.
-  BuildMethodSeaGraph(code_item, dex_file, class_def_idx, method_idx);
+  BuildMethodSeaGraph(code_item, dex_file, class_def_idx, method_idx, method_access_flags);
   // Pass: Compute reverse post-order of regions.
   ComputeRPO();
   // Multiple passes: compute immediate dominators.
@@ -433,6 +417,8 @@ void SeaGraph::CompileMethod(const art::DexFile::CodeItem* code_item,
   ComputeDominanceFrontier();
   // Two Passes: Phi node insertion.
   ConvertToSSA();
+  // Pass: type inference
+  ti_->ComputeTypes(this);
   // Pass: Generate LLVM IR.
   GenerateLLVM();
 }
@@ -465,18 +451,10 @@ void SeaGraph::AddRegion(Region* r) {
   regions_.push_back(r);
 }
 
-/*
-void SeaNode::AddSuccessor(Region* successor) {
-  DCHECK(successor) << "Tried to add NULL successor to SEA node.";
-  successors_.push_back(successor);
-  return;
-}
+SeaGraph::SeaGraph(const art::DexFile& df)
+    :ti_(new TypeInference()), class_def_idx_(0), method_idx_(0),  method_access_flags_(),
+     regions_(), parameters_(), dex_file_(df), code_item_(NULL) { }
 
-void SeaNode::AddPredecessor(Region* predecessor) {
-  DCHECK(predecessor) << "Tried to add NULL predecessor to SEA node.";
-  predecessors_.push_back(predecessor);
-}
-*/
 void Region::AddChild(sea_ir::InstructionNode* instruction) {
   DCHECK(instruction) << "Tried to add NULL instruction to region node.";
   instructions_.push_back(instruction);
@@ -490,46 +468,6 @@ SeaNode* Region::GetLastChild() const {
   return NULL;
 }
 
-void Region::ToDot(std::string& result, const art::DexFile& dex_file) const {
-  result += "\n// Region: \nsubgraph " + StringId() + " { label=\"region " + StringId() + "(rpo=";
-  result += art::StringPrintf("%d", rpo_number_);
-  if (NULL != GetIDominator()) {
-    result += " dom=" + GetIDominator()->StringId();
-  }
-  result += ")\";\n";
-
-  for (std::vector<PhiInstructionNode*>::const_iterator cit = phi_instructions_.begin();
-        cit != phi_instructions_.end(); cit++) {
-    result += (*cit)->StringId() +";\n";
-  }
-
-  for (std::vector<InstructionNode*>::const_iterator cit = instructions_.begin();
-        cit != instructions_.end(); cit++) {
-      result += (*cit)->StringId() +";\n";
-    }
-
-  result += "} // End Region.\n";
-
-  // Save phi-nodes.
-  for (std::vector<PhiInstructionNode*>::const_iterator cit = phi_instructions_.begin();
-      cit != phi_instructions_.end(); cit++) {
-    (*cit)->ToDot(result, dex_file);
-  }
-
-  // Save instruction nodes.
-  for (std::vector<InstructionNode*>::const_iterator cit = instructions_.begin();
-      cit != instructions_.end(); cit++) {
-    (*cit)->ToDot(result, dex_file);
-  }
-
-  for (std::vector<Region*>::const_iterator cit = successors_.begin(); cit != successors_.end();
-      cit++) {
-    DCHECK(NULL != *cit) << "Null successor found for SeaNode" << GetLastChild()->StringId() << ".";
-    result += GetLastChild()->StringId() + " -> " + (*cit)->GetLastChild()->StringId() +
-         "[lhead=" + (*cit)->StringId() + ", " + "ltail=" + StringId() + "];\n\n";
-  }
-}
-
 void Region::ComputeDownExposedDefs() {
   for (std::vector<InstructionNode*>::const_iterator inst_it = instructions_.begin();
       inst_it != instructions_.end(); inst_it++) {
@@ -692,38 +630,6 @@ std::vector<InstructionNode*> InstructionNode::Create(const art::Instruction* in
   return sea_instructions;
 }
 
-void InstructionNode::ToDotSSAEdges(std::string& result) const {
-  // SSA definitions:
-  for (std::map<int, InstructionNode*>::const_iterator def_it = definition_edges_.begin();
-      def_it != definition_edges_.end(); def_it++) {
-    if (NULL != def_it->second) {
-      result += def_it->second->StringId() + " -> " + StringId() + "[color=gray,label=\"";
-      result += art::StringPrintf("vR = %d", def_it->first);
-      result += "\"] ; // ssa edge\n";
-    }
-  }
-
-  // SSA used-by:
-  if (DotConversion::SaveUseEdges()) {
-    for (std::vector<InstructionNode*>::const_iterator cit = used_in_.begin();
-        cit != used_in_.end(); cit++) {
-      result += (*cit)->StringId() + " -> " + StringId() + "[color=gray,label=\"";
-      result += "\"] ; // SSA used-by edge\n";
-    }
-  }
-}
-
-void InstructionNode::ToDot(std::string& result, const art::DexFile& dex_file) const {
-  result += "// Instruction ("+StringId()+"): \n" + StringId() +
-      " [label=\"" + instruction_->DumpString(&dex_file) + "\"";
-  if (de_def_) {
-    result += "style=bold";
-  }
-  result += "];\n";
-
-  ToDotSSAEdges(result);
-}
-
 void InstructionNode::MarkAsDEDef() {
   de_def_ = true;
 }
@@ -747,7 +653,7 @@ std::vector<int> InstructionNode::GetDefinitions() const {
   return definitions;
 }
 
-std::vector<int> InstructionNode::GetUses() {
+std::vector<int> InstructionNode::GetUses() const {
   std::vector<int> uses;  // Using vector<> instead of set<> because order matters.
   if (!InstructionTools::IsDefinition(instruction_) && (instruction_->HasVRegA())) {
     int vA = instruction_->VRegA();
@@ -763,13 +669,4 @@ std::vector<int> InstructionNode::GetUses() {
   }
   return uses;
 }
-
-void PhiInstructionNode::ToDot(std::string& result, const art::DexFile& dex_file) const {
-  result += "// PhiInstruction: \n" + StringId() +
-      " [label=\"" + "PHI(";
-  result += art::StringPrintf("%d", register_no_);
-  result += ")\"";
-  result += "];\n";
-  ToDotSSAEdges(result);
-}
 }  // namespace sea_ir
diff --git a/compiler/sea_ir/sea.h b/compiler/sea_ir/ir/sea.h
index 5cb84240ae..0b20ed7d74 100644
--- a/compiler/sea_ir/sea.h
+++ b/compiler/sea_ir/ir/sea.h
@@ -15,17 +15,18 @@
  */
 
 
-#ifndef ART_COMPILER_SEA_IR_SEA_H_
-#define ART_COMPILER_SEA_IR_SEA_H_
+#ifndef ART_COMPILER_SEA_IR_IR_SEA_H_
+#define ART_COMPILER_SEA_IR_IR_SEA_H_
 
 #include <set>
 #include <map>
 
+#include "utils/scoped_hashtable.h"
+#include "gtest/gtest_prod.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
-#include "instruction_tools.h"
-#include "instruction_nodes.h"
-#include "utils/scoped_hashtable.h"
+#include "sea_ir/ir/instruction_tools.h"
+#include "sea_ir/ir/instruction_nodes.h"
 
 namespace sea_ir {
 
@@ -35,19 +36,9 @@ enum RegionNumbering {
   VISITING = -2
 };
 
-// Stores options for turning a SEA IR graph to a .dot file.
-class DotConversion {
- public:
-  static bool SaveUseEdges() {
-    return save_use_edges_;
-  }
-
- private:
-  static const bool save_use_edges_ =  false;  // TODO: Enable per-sea graph configuration.
-};
+class TypeInference;
 
 class Region;
-
 class InstructionNode;
 class PhiInstructionNode;
 class SignatureNode;
@@ -57,21 +48,20 @@ class SignatureNode;
 // can return from the GetSSAUses() calls, instead of having missing SSA edges.
 class SignatureNode: public InstructionNode {
  public:
-  explicit SignatureNode(unsigned int parameter_register):InstructionNode(NULL),
-    parameter_register_(parameter_register) { }
-
-  void ToDot(std::string& result, const art::DexFile& dex_file) const {
-    result += StringId() +" [label=\"signature:";
-    result += art::StringPrintf("r%d", GetResultRegister());
-    result += "\"] // signature node\n";
-    ToDotSSAEdges(result);
-  }
+  // Creates a new signature node representing the initial definition of the
+  // register @parameter_register which is the @position-th argument to the method.
+  explicit SignatureNode(unsigned int parameter_register, unsigned int position):
+    InstructionNode(NULL), parameter_register_(parameter_register), position_(position) { }
 
   int GetResultRegister() const {
     return parameter_register_;
   }
 
-  std::vector<int> GetUses() {
+  unsigned int GetPositionInSignature() const {
+    return position_;
+  }
+
+  std::vector<int> GetUses() const {
     return std::vector<int>();
   }
 
@@ -81,15 +71,15 @@ class SignatureNode: public InstructionNode {
   }
 
  private:
-  unsigned int parameter_register_;
+  const unsigned int parameter_register_;
+  const unsigned int position_;     // The position of this parameter node is
+                                    // in the function parameter list.
 };
 
 class PhiInstructionNode: public InstructionNode {
  public:
   explicit PhiInstructionNode(int register_no):
     InstructionNode(NULL), register_no_(register_no), definition_edges_() {}
-  // Appends to @result the .dot string representation of the instruction.
-  void ToDot(std::string& result, const art::DexFile& dex_file) const;
   // Returns the register on which this phi-function is used.
   int GetRegisterNumber() const {
     return register_no_;
@@ -113,6 +103,17 @@ class PhiInstructionNode: public InstructionNode {
     definition->AddSSAUse(this);
   }
 
+  // Returns the ordered set of Instructions that define the input operands of this instruction.
+  // Precondition: SeaGraph.ConvertToSSA().
+  std::vector<InstructionNode*> GetSSAProducers() {
+    std::vector<InstructionNode*> producers;
+    for (std::vector<std::vector<InstructionNode*>*>::const_iterator
+        cit = definition_edges_.begin(); cit != definition_edges_.end(); cit++) {
+      producers.insert(producers.end(), (*cit)->begin(), (*cit)->end());
+    }
+    return producers;
+  }
+
   // Returns the instruction that defines the phi register from predecessor
   // on position @predecessor_pos. Note that the return value is vector<> just
   // for consistency with the return value of GetSSAUses() on regular instructions,
@@ -128,6 +129,9 @@ class PhiInstructionNode: public InstructionNode {
 
  private:
   int register_no_;
+  // This vector has one entry for each predecessors, each with a single
+  // element, storing the id of the instruction that defines the register
+  // corresponding to this phi function.
   std::vector<std::vector<InstructionNode*>*> definition_edges_;
 };
 
@@ -150,10 +154,7 @@ class Region : public SeaNode {
   std::vector<InstructionNode*>* GetInstructions() {
     return &instructions_;
   }
-  // Appends to @result a dot language formatted string representing the node and
-  //    (by convention) outgoing edges, so that the composition of theToDot() of all nodes
-  //    builds a complete dot graph (without prolog and epilog though).
-  virtual void ToDot(std::string& result, const art::DexFile& dex_file) const;
+
   // Computes Downward Exposed Definitions for the current node.
   void ComputeDownExposedDefs();
   const std::map<int, sea_ir::InstructionNode*>* GetDownExposedDefs() const;
@@ -257,16 +258,14 @@ class Region : public SeaNode {
 // and acts as starting point for visitors (ex: during code generation).
 class SeaGraph: IVisitable {
  public:
-  static SeaGraph* GetCurrentGraph(const art::DexFile&);
+  static SeaGraph* GetGraph(const art::DexFile&);
 
-  void CompileMethod(const art::DexFile::CodeItem* code_item,
-      uint32_t class_def_idx, uint32_t method_idx, const art::DexFile& dex_file);
+  void CompileMethod(const art::DexFile::CodeItem* code_item, uint32_t class_def_idx,
+      uint32_t method_idx, uint32_t method_access_flags, const art::DexFile& dex_file);
   // Returns all regions corresponding to this SeaGraph.
   std::vector<Region*>* GetRegions() {
     return &regions_;
   }
-  // Returns a string representation of the region and its Instruction children.
-  void DumpSea(std::string filename) const;
   // Recursively computes the reverse postorder value for @crt_bb and successors.
   static void ComputeRPO(Region* crt_bb, int& crt_rpo);
   // Returns the "lowest common ancestor" of @i and @j in the dominator tree.
@@ -275,13 +274,28 @@ class SeaGraph: IVisitable {
   std::vector<SignatureNode*>* GetParameterNodes() {
     return &parameters_;
   }
+
+  const art::DexFile* GetDexFile() const {
+    return &dex_file_;
+  }
+
+  virtual void Accept(IRVisitor* visitor) {
+    visitor->Initialize(this);
+    visitor->Visit(this);
+    visitor->Traverse(this);
+  }
+
+  TypeInference* ti_;
   uint32_t class_def_idx_;
   uint32_t method_idx_;
+  uint32_t method_access_flags_;
+
+ protected:
+  explicit SeaGraph(const art::DexFile& df);
+  virtual ~SeaGraph() { }
 
  private:
-  explicit SeaGraph(const art::DexFile& df):
-    class_def_idx_(0), method_idx_(0), regions_(), parameters_(), dex_file_(df) {
-  }
+  FRIEND_TEST(RegionsTest, Basics);
   // Registers @childReg as a region belonging to the SeaGraph instance.
   void AddRegion(Region* childReg);
   // Returns new region and registers it with the  SeaGraph instance.
@@ -295,7 +309,8 @@ class SeaGraph: IVisitable {
   // Builds the non-SSA sea-ir representation of the function @code_item from @dex_file
   // with class id @class_def_idx and method id @method_idx.
   void BuildMethodSeaGraph(const art::DexFile::CodeItem* code_item,
-      const art::DexFile& dex_file, uint32_t class_def_idx, uint32_t method_idx);
+      const art::DexFile& dex_file, uint32_t class_def_idx,
+      uint32_t method_idx, uint32_t method_access_flags);
   // Computes immediate dominators for each region.
   // Precondition: ComputeMethodSeaGraph()
   void ComputeIDominators();
@@ -320,14 +335,6 @@ class SeaGraph: IVisitable {
   // Identifies the definitions corresponding to uses for region @node
   // by using the scoped hashtable of names @ scoped_table.
   void RenameAsSSA(Region* node, utils::ScopedHashtable<int, InstructionNode*>* scoped_table);
-
-  virtual void Accept(IRVisitor* visitor) {
-    visitor->Initialize(this);
-    visitor->Visit(this);
-    visitor->Traverse(this);
-  }
-
-  virtual ~SeaGraph() {}
   // Generate LLVM IR for the method.
   // Precondition: ConvertToSSA().
   void GenerateLLVM();
@@ -336,6 +343,7 @@ class SeaGraph: IVisitable {
   std::vector<Region*> regions_;
   std::vector<SignatureNode*> parameters_;
   const art::DexFile& dex_file_;
+  const art::DexFile::CodeItem* code_item_;
 };
 }  // namespace sea_ir
-#endif  // ART_COMPILER_SEA_IR_SEA_H_
+#endif  // ART_COMPILER_SEA_IR_IR_SEA_H_
diff --git a/compiler/sea_ir/sea_node.h b/compiler/sea_ir/ir/sea_node.h
index c13e5d6aba..4dab5cba83 100644
--- a/compiler/sea_ir/sea_node.h
+++ b/compiler/sea_ir/ir/sea_node.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_SEA_IR_SEA_NODE_H_
-#define ART_COMPILER_SEA_IR_SEA_NODE_H_
+#ifndef ART_COMPILER_SEA_IR_IR_SEA_NODE_H_
+#define ART_COMPILER_SEA_IR_IR_SEA_NODE_H_
 
 #include "base/stringprintf.h"
 
@@ -56,10 +56,6 @@ class SeaNode: public IVisitable {
   int Id() const {
     return id_;
   }
-  // Appends to @result a dot language formatted string representing the node and
-  //    (by convention) outgoing edges, so that the composition of theToDot() of all nodes
-  //    builds a complete dot graph, but without prolog ("digraph {") and epilog ("}").
-  virtual void ToDot(std::string& result, const art::DexFile& dex_file) const = 0;
 
   virtual ~SeaNode() { }
 
@@ -78,4 +74,4 @@ class SeaNode: public IVisitable {
   DISALLOW_COPY_AND_ASSIGN(SeaNode);
 };
 }  // namespace sea_ir
-#endif  // ART_COMPILER_SEA_IR_SEA_NODE_H_
+#endif  // ART_COMPILER_SEA_IR_IR_SEA_NODE_H_
diff --git a/compiler/sea_ir/visitor.h b/compiler/sea_ir/ir/visitor.h
index a4fec7b8a9..cc7b5d153f 100644
--- a/compiler/sea_ir/visitor.h
+++ b/compiler/sea_ir/ir/visitor.h
@@ -14,16 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_SEA_IR_VISITOR_H_
-#define ART_COMPILER_SEA_IR_VISITOR_H_
-
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Analysis/Verifier.h"
-// TODO: Separating the root visitor from the code_gen visitor
-// would allow me to not include llvm headers here.
-
+#ifndef ART_COMPILER_SEA_IR_IR_VISITOR_H_
+#define ART_COMPILER_SEA_IR_IR_VISITOR_H_
 
 namespace sea_ir {
 
@@ -32,6 +24,7 @@ class Region;
 class InstructionNode;
 class PhiInstructionNode;
 class SignatureNode;
+class UnnamedConstInstructionNode;
 class ConstInstructionNode;
 class ReturnInstructionNode;
 class IfNeInstructionNode;
@@ -48,7 +41,7 @@ class IfEqzInstructionNode;
 
 class IRVisitor {
  public:
-  explicit IRVisitor():ordered_regions_() { }
+  explicit IRVisitor(): ordered_regions_() { }
   virtual void Initialize(SeaGraph* graph) = 0;
   virtual void Visit(SeaGraph* graph) = 0;
   virtual void Visit(Region* region) = 0;
@@ -57,16 +50,16 @@ class IRVisitor {
 
   virtual void Visit(InstructionNode* region) = 0;
   virtual void Visit(ConstInstructionNode* instruction) = 0;
+  virtual void Visit(UnnamedConstInstructionNode* instruction) = 0;
   virtual void Visit(ReturnInstructionNode* instruction) = 0;
   virtual void Visit(IfNeInstructionNode* instruction) = 0;
-  // virtual void Visit(AddIntLitInstructionNode* instruction) = 0;
   virtual void Visit(MoveResultInstructionNode* instruction) = 0;
   virtual void Visit(InvokeStaticInstructionNode* instruction) = 0;
   virtual void Visit(AddIntInstructionNode* instruction) = 0;
   virtual void Visit(GotoInstructionNode* instruction) = 0;
   virtual void Visit(IfEqzInstructionNode* instruction) = 0;
 
-  // Note: This favor of visitor separates the traversal functions from the actual visiting part
+  // Note: This flavor of visitor separates the traversal functions from the actual visiting part
   //       so that the Visitor subclasses don't duplicate code and can't get the traversal wrong.
   //       The disadvantage is the increased number of functions (and calls).
   virtual void Traverse(SeaGraph* graph);
@@ -91,4 +84,4 @@ class IRVisitor {
   std::vector<Region*> ordered_regions_;
 };
 }  // namespace sea_ir
-#endif  // ART_COMPILER_SEA_IR_VISITOR_H_
+#endif  // ART_COMPILER_SEA_IR_IR_VISITOR_H_
diff --git a/compiler/sea_ir/types/type_data_test.cc b/compiler/sea_ir/types/type_data_test.cc
new file mode 100644
index 0000000000..a66ebce38f
--- /dev/null
+++ b/compiler/sea_ir/types/type_data_test.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test.h"
+#include "sea_ir/types/types.h"
+
+namespace sea_ir {
+
+class TypeDataTest : public art::CommonTest {
+};
+
+TEST_F(TypeDataTest, Basics) {
+  TypeData td;
+  art::verifier::RegTypeCache type_cache(false);
+  int first_instruction_id = 1;
+  int second_instruction_id = 3;
+  EXPECT_TRUE(NULL == td.FindTypeOf(first_instruction_id));
+  const Type* int_type = &type_cache.Integer();
+  const Type* byte_type = &type_cache.Byte();
+  td.SetTypeOf(first_instruction_id, int_type);
+  EXPECT_TRUE(int_type == td.FindTypeOf(first_instruction_id));
+  EXPECT_TRUE(NULL == td.FindTypeOf(second_instruction_id));
+  td.SetTypeOf(second_instruction_id, byte_type);
+  EXPECT_TRUE(int_type == td.FindTypeOf(first_instruction_id));
+  EXPECT_TRUE(byte_type == td.FindTypeOf(second_instruction_id));
+}
+
+}  // namespace art
diff --git a/compiler/sea_ir/types/type_inference.cc b/compiler/sea_ir/types/type_inference.cc
new file mode 100644
index 0000000000..31d7f0f8b6
--- /dev/null
+++ b/compiler/sea_ir/types/type_inference.cc
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "scoped_thread_state_change.h"
+#include "sea_ir/types/type_inference.h"
+#include "sea_ir/types/type_inference_visitor.h"
+#include "sea_ir/ir/sea.h"
+
+namespace sea_ir {
+
+bool TypeInference::IsPrimitiveDescriptor(char descriptor) {
+  switch (descriptor) {
+  case 'I':
+  case 'C':
+  case 'S':
+  case 'B':
+  case 'Z':
+  case 'F':
+  case 'D':
+  case 'J':
+    return true;
+  default:
+    return false;
+  }
+}
+
+FunctionTypeInfo::FunctionTypeInfo(const SeaGraph* graph, art::verifier::RegTypeCache* types)
+    : dex_file_(graph->GetDexFile()), dex_method_idx_(graph->method_idx_), type_cache_(types),
+    method_access_flags_(graph->method_access_flags_) {
+  const art::DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
+  const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(method_id.class_idx_));
+  declaring_class_ = &(type_cache_->FromDescriptor(NULL, descriptor, false));
+}
+
+FunctionTypeInfo::FunctionTypeInfo(const SeaGraph* graph, InstructionNode* inst,
+    art::verifier::RegTypeCache* types): dex_file_(graph->GetDexFile()),
+        dex_method_idx_(inst->GetInstruction()->VRegB_35c()), type_cache_(types),
+        method_access_flags_(0) {
+  // TODO: Test that GetDeclaredArgumentTypes() works correctly when using this constructor.
+  const art::DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
+  const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(method_id.class_idx_));
+  declaring_class_ = &(type_cache_->FromDescriptor(NULL, descriptor, false));
+}
+
+const Type* FunctionTypeInfo::GetReturnValueType() {
+  const art::DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
+  uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
+  const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  const Type& return_type = type_cache_->FromDescriptor(NULL, descriptor, false);
+  return &return_type;
+}
+
+
+
+std::vector<const Type*> FunctionTypeInfo::GetDeclaredArgumentTypes() {
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  std::vector<const Type*> argument_types;
+  // Include the "this" pointer.
+  size_t cur_arg = 0;
+  if (!IsStatic()) {
+    // If this is a constructor for a class other than java.lang.Object, mark the first ("this")
+    // argument as uninitialized. This restricts field access until the superclass constructor is
+    // called.
+    const art::verifier::RegType& declaring_class = GetDeclaringClass();
+    if (IsConstructor() && !declaring_class.IsJavaLangObject()) {
+      argument_types.push_back(&(type_cache_->UninitializedThisArgument(declaring_class)));
+    } else {
+      argument_types.push_back(&declaring_class);
+    }
+    cur_arg++;
+  }
+
+  const art::DexFile::ProtoId& proto_id =
+      dex_file_->GetMethodPrototype(dex_file_->GetMethodId(dex_method_idx_));
+  art::DexFileParameterIterator iterator(*dex_file_, proto_id);
+
+  for (; iterator.HasNext(); iterator.Next()) {
+    const char* descriptor = iterator.GetDescriptor();
+    if (descriptor == NULL) {
+      LOG(FATAL) << "Error: Encountered null type descriptor for function argument.";
+    }
+    switch (descriptor[0]) {
+      case 'L':
+      case '[':
+        // We assume that reference arguments are initialized. The only way it could be otherwise
+        // (assuming the caller was verified) is if the current method is <init>, but in that case
+        // it's effectively considered initialized the instant we reach here (in the sense that we
+        // can return without doing anything or call virtual methods).
+        {
+          const Type& reg_type = type_cache_->FromDescriptor(NULL, descriptor, false);
+          argument_types.push_back(&reg_type);
+        }
+        break;
+      case 'Z':
+        argument_types.push_back(&type_cache_->Boolean());
+        break;
+      case 'C':
+        argument_types.push_back(&type_cache_->Char());
+        break;
+      case 'B':
+        argument_types.push_back(&type_cache_->Byte());
+        break;
+      case 'I':
+        argument_types.push_back(&type_cache_->Integer());
+        break;
+      case 'S':
+        argument_types.push_back(&type_cache_->Short());
+        break;
+      case 'F':
+        argument_types.push_back(&type_cache_->Float());
+        break;
+      case 'J':
+      case 'D': {
+        // TODO: Figure out strategy for two-register operands (double, long)
+        LOG(FATAL) << "Error: Type inference for 64-bit variables has not been implemented.";
+        break;
+      }
+      default:
+        LOG(FATAL) << "Error: Unexpected signature encountered during type inference.";
+    }
+    cur_arg++;
+  }
+  return argument_types;
+}
+
+// TODO: Lock is only used for dumping types (during development). Remove this for performance.
+void TypeInference::ComputeTypes(SeaGraph* graph) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  std::vector<Region*>* regions = graph->GetRegions();
+  std::list<InstructionNode*> worklist;
+  // Fill the work-list with all instructions.
+  for (std::vector<Region*>::const_iterator region_it = regions->begin();
+      region_it != regions->end(); region_it++) {
+    std::vector<PhiInstructionNode*>* phi_instructions = (*region_it)->GetPhiNodes();
+    std::copy(phi_instructions->begin(), phi_instructions->end(), std::back_inserter(worklist));
+    std::vector<InstructionNode*>* instructions = (*region_it)->GetInstructions();
+    std::copy(instructions->begin(), instructions->end(), std::back_inserter(worklist));
+  }
+  TypeInferenceVisitor tiv(graph, &type_data_, type_cache_);
+  // Sparse (SSA) fixed-point algorithm that processes each instruction in the work-list,
+  // adding consumers of instructions whose result changed type back into the work-list.
+  // Note: According to [1] list iterators should not be invalidated on insertion,
+  //       which simplifies the implementation; not 100% sure other STL implementations
+  //       maintain this invariant, but they should.
+  //       [1] http://www.sgi.com/tech/stl/List.html
+  // TODO: Making this conditional (as in sparse conditional constant propagation) would be good.
+  // TODO: Remove elements as I go.
+  for (std::list<InstructionNode*>::const_iterator instruction_it = worklist.begin();
+        instruction_it != worklist.end(); instruction_it++) {
+    std::cout << "[TI] Instruction: " << (*instruction_it)->Id() << std::endl;
+    (*instruction_it)->Accept(&tiv);
+    const Type* old_type = type_data_.FindTypeOf((*instruction_it)->Id());
+    const Type* new_type = tiv.GetType();
+    bool type_changed = (old_type != new_type);
+    if (type_changed) {
+      std::cout << " New type:" << new_type->IsIntegralTypes() << std::endl;
+      std::cout << " Descrip:" << new_type->Dump()<< " on " << (*instruction_it)->Id() << std::endl;
+      type_data_.SetTypeOf((*instruction_it)->Id(), new_type);
+      // Add SSA consumers of the current instruction to the work-list.
+      std::vector<InstructionNode*>* consumers = (*instruction_it)->GetSSAConsumers();
+      for (std::vector<InstructionNode*>::iterator consumer = consumers->begin();
+          consumer != consumers->end(); consumer++) {
+        worklist.push_back(*consumer);
+      }
+    }
+  }
+}
+}   // namespace sea_ir
diff --git a/compiler/sea_ir/types/type_inference.h b/compiler/sea_ir/types/type_inference.h
new file mode 100644
index 0000000000..d951d82071
--- /dev/null
+++ b/compiler/sea_ir/types/type_inference.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_SEA_IR_TYPES_TYPE_INFERENCE_H_
+#define ART_COMPILER_SEA_IR_TYPES_TYPE_INFERENCE_H_
+
+#include "safe_map.h"
+#include "dex_file-inl.h"
+#include "sea_ir/types/types.h"
+
+namespace sea_ir {
+
+class SeaGraph;
+class InstructionNode;
+
+// The type inference in SEA IR is different from the verifier in that it is concerned
+// with a rich type hierarchy (TODO) usable in optimization and does not perform
+// precise verification (which is the job of the verifier).
+class TypeInference {
+ public:
+  TypeInference() {
+    type_cache_ = new art::verifier::RegTypeCache(false);
+  }
+
+  // Computes the types for the method with SEA IR representation provided by @graph.
+  void ComputeTypes(SeaGraph* graph);
+
+  art::SafeMap<int, const Type*>* GetTypeMap() {
+    return type_data_.GetTypeMap();
+  }
+  // Returns true if @descriptor corresponds to a primitive type.
+  static bool IsPrimitiveDescriptor(char descriptor);
+
+ protected:
+  art::verifier::RegTypeCache* type_cache_;
+  TypeData type_data_;
+};
+
+// Stores information about the exact type of  a function.
+class FunctionTypeInfo {
+ public:
+  // Finds method information about the method encoded by a SEA IR graph.
+  // @graph provides the input method SEA IR representation.
+  // @types provides the input cache of types from which the
+  //        parameter types of the function are found.
+  FunctionTypeInfo(const SeaGraph* graph, art::verifier::RegTypeCache* types);
+  // Finds method information about the method encoded by
+  // an invocation instruction in a SEA IR graph.
+  // @graph provides the input method SEA IR representation.
+  // @inst  is an invocation instruction for the desired method.
+  // @types provides the input cache of types from which the
+  //        parameter types of the function are found.
+  FunctionTypeInfo(const SeaGraph* graph, InstructionNode* inst,
+      art::verifier::RegTypeCache* types);
+  // Returns the ordered vector of types corresponding to the function arguments.
+  std::vector<const Type*> GetDeclaredArgumentTypes();
+  // Returns the declared return value type.
+  const Type* GetReturnValueType();
+  // Returns the type corresponding to the class that declared the method.
+  const Type& GetDeclaringClass() {
+    return *declaring_class_;
+  }
+
+  bool IsConstructor() const {
+    return (method_access_flags_ & kAccConstructor) != 0;
+  }
+
+  bool IsStatic() const {
+    return (method_access_flags_ & kAccStatic) != 0;
+  }
+
+ protected:
+  const Type* declaring_class_;
+  const art::DexFile* dex_file_;
+  const uint32_t dex_method_idx_;
+  art::verifier::RegTypeCache* type_cache_;
+  const uint32_t method_access_flags_;  // Method's access flags.
+};
+}  // namespace sea_ir
+
+#endif  // ART_COMPILER_SEA_IR_TYPES_TYPE_INFERENCE_H_
diff --git a/compiler/sea_ir/types/type_inference_visitor.cc b/compiler/sea_ir/types/type_inference_visitor.cc
new file mode 100644
index 0000000000..3da2fc1e2c
--- /dev/null
+++ b/compiler/sea_ir/types/type_inference_visitor.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scoped_thread_state_change.h"
+#include "sea_ir/types/type_inference_visitor.h"
+#include "sea_ir/types/type_inference.h"
+#include "sea_ir/ir/sea.h"
+
+namespace sea_ir {
+
+void TypeInferenceVisitor::Visit(SignatureNode* parameter) {
+  FunctionTypeInfo fti(graph_, type_cache_);
+  std::vector<const Type*> arguments = fti.GetDeclaredArgumentTypes();
+  DCHECK_LT(parameter->GetPositionInSignature(), arguments.size())
+    << "Signature node position not present in signature.";
+  crt_type_.push_back(arguments.at(parameter->GetPositionInSignature()));
+}
+
+void TypeInferenceVisitor::Visit(UnnamedConstInstructionNode* instruction) {
+  crt_type_.push_back(&type_cache_->Integer());
+}
+
+void TypeInferenceVisitor::Visit(PhiInstructionNode* instruction) {
+  std::vector<const Type*> types_to_merge = GetOperandTypes(instruction);
+  const Type* result_type = MergeTypes(types_to_merge);
+  crt_type_.push_back(result_type);
+}
+
+void TypeInferenceVisitor::Visit(AddIntInstructionNode* instruction) {
+  std::vector<const Type*> operand_types = GetOperandTypes(instruction);
+  for (std::vector<const Type*>::const_iterator cit = operand_types.begin();
+      cit != operand_types.end(); cit++) {
+    if (*cit != NULL) {
+      DCHECK((*cit)->IsInteger());
+    }
+  }
+  crt_type_.push_back(&type_cache_->Integer());
+}
+
+void TypeInferenceVisitor::Visit(MoveResultInstructionNode* instruction) {
+  std::vector<const Type*> operand_types = GetOperandTypes(instruction);
+  const Type* operand_type = operand_types.at(0);
+  crt_type_.push_back(operand_type);
+}
+
+void TypeInferenceVisitor::Visit(InvokeStaticInstructionNode* instruction) {
+  FunctionTypeInfo fti(graph_, instruction, type_cache_);
+  const Type* result_type = fti.GetReturnValueType();
+  crt_type_.push_back(result_type);
+}
+
+std::vector<const Type*> TypeInferenceVisitor::GetOperandTypes(
+    InstructionNode* instruction) const {
+  std::vector<InstructionNode*> sources = instruction->GetSSAProducers();
+  std::vector<const Type*> types_to_merge;
+  for (std::vector<InstructionNode*>::const_iterator cit = sources.begin(); cit != sources.end();
+      cit++) {
+    const Type* source_type = type_data_->FindTypeOf((*cit)->Id());
+    if (source_type != NULL) {
+      types_to_merge.push_back(source_type);
+    }
+  }
+  return types_to_merge;
+}
+
+const Type* TypeInferenceVisitor::MergeTypes(std::vector<const Type*>& types) const {
+  const Type* type = NULL;
+  if (types.size()>0) {
+    type = *(types.begin());
+    if (types.size()>1) {
+      for (std::vector<const Type*>::const_iterator cit = types.begin();
+          cit != types.end(); cit++) {
+        if (!type->Equals(**cit)) {
+          type = MergeTypes(type, *cit);
+        }
+      }
+    }
+  }
+  return type;
+}
+
+const Type* TypeInferenceVisitor::MergeTypes(const Type* t1, const Type* t2) const {
+  DCHECK(t2 != NULL);
+  DCHECK(t1 != NULL);
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  const Type* result = &(t1->Merge(*t2, type_cache_));
+  return result;
+}
+
+}   // namespace sea_ir
diff --git a/compiler/sea_ir/types/type_inference_visitor.h b/compiler/sea_ir/types/type_inference_visitor.h
new file mode 100644
index 0000000000..200b9f0253
--- /dev/null
+++ b/compiler/sea_ir/types/type_inference_visitor.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_SEA_IR_TYPES_TYPE_INFERENCE_VISITOR_H_
+#define ART_COMPILER_SEA_IR_TYPES_TYPE_INFERENCE_VISITOR_H_
+
+
+#include "dex_file-inl.h"
+#include "sea_ir/ir/visitor.h"
+#include "sea_ir/types/types.h"
+
+namespace sea_ir {
+
+// The TypeInferenceVisitor visits each instruction and computes its type taking into account
+//   the current type of the operands. The type is stored in the visitor.
+// We may be better off by using a separate visitor type hierarchy that has return values
+//   or that passes data as parameters, than to use fields to store information that should
+//   in fact be returned after visiting each element. Ideally, I would prefer to use templates
+//   to specify the returned value type, but I am not aware of a possible implementation
+//   that does not horribly duplicate the visitor infrastructure code (version 1: no return value,
+//   version 2: with template return value).
+class TypeInferenceVisitor: public IRVisitor {
+ public:
+  TypeInferenceVisitor(SeaGraph* graph, TypeData* type_data,
+      art::verifier::RegTypeCache* types):
+    graph_(graph), type_data_(type_data), type_cache_(types), crt_type_() {
+  }
+  // There are no type related actions to be performed on these classes.
+  void Initialize(SeaGraph* graph) { }
+  void Visit(SeaGraph* graph) { }
+  void Visit(Region* region) { }
+
+  void Visit(PhiInstructionNode* instruction);
+  void Visit(SignatureNode* parameter);
+  void Visit(InstructionNode* instruction) { }
+  void Visit(UnnamedConstInstructionNode* instruction);
+  void Visit(ConstInstructionNode* instruction) { }
+  void Visit(ReturnInstructionNode* instruction) { }
+  void Visit(IfNeInstructionNode* instruction) { }
+  void Visit(MoveResultInstructionNode* instruction);
+  void Visit(InvokeStaticInstructionNode* instruction);
+  void Visit(AddIntInstructionNode* instruction);
+  void Visit(GotoInstructionNode* instruction) { }
+  void Visit(IfEqzInstructionNode* instruction) { }
+
+  const Type* MergeTypes(std::vector<const Type*>& types) const;
+  const Type* MergeTypes(const Type* t1, const Type* t2) const;
+  std::vector<const Type*> GetOperandTypes(InstructionNode* instruction) const;
+  const Type* GetType() {
+    // TODO: Currently multiple defined types are not supported.
+    if (crt_type_.size()>0) {
+      const Type* single_type = crt_type_.at(0);
+      crt_type_.clear();
+      return single_type;
+    }
+    return NULL;
+  }
+
+ protected:
+  const SeaGraph* const graph_;
+  TypeData* type_data_;
+  art::verifier::RegTypeCache* type_cache_;
+  std::vector<const Type*> crt_type_;             // Stored temporarily between two calls to Visit.
+};
+
+}  // namespace sea_ir
+
+#endif  // ART_COMPILER_SEA_IR_TYPES_TYPE_INFERENCE_VISITOR_H_
diff --git a/compiler/sea_ir/types/type_inference_visitor_test.cc b/compiler/sea_ir/types/type_inference_visitor_test.cc
new file mode 100644
index 0000000000..8a249ebceb
--- /dev/null
+++ b/compiler/sea_ir/types/type_inference_visitor_test.cc
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test.h"
+#include "sea_ir/types/type_inference_visitor.h"
+#include "sea_ir/ir/sea.h"
+
+namespace sea_ir {
+
+class TestInstructionNode:public InstructionNode {
+ public:
+  explicit TestInstructionNode(std::vector<InstructionNode*> prods): InstructionNode(NULL),
+      producers_(prods) { }
+  std::vector<InstructionNode*> GetSSAProducers() {
+    return producers_;
+  }
+ protected:
+  std::vector<InstructionNode*> producers_;
+};
+
+class TypeInferenceVisitorTest : public art::CommonTest {
+};
+
+TEST_F(TypeInferenceVisitorTest, MergeIntWithByte) {
+  TypeData td;
+  art::verifier::RegTypeCache type_cache(false);
+  TypeInferenceVisitor tiv(NULL, &td, &type_cache);
+  const Type* int_type = &type_cache.Integer();
+  const Type* byte_type = &type_cache.Byte();
+  const Type* ib_type = tiv.MergeTypes(int_type, byte_type);
+  const Type* bi_type = tiv.MergeTypes(byte_type, int_type);
+  EXPECT_TRUE(ib_type == int_type);
+  EXPECT_TRUE(bi_type == int_type);
+}
+
+TEST_F(TypeInferenceVisitorTest, MergeIntWithShort) {
+  TypeData td;
+  art::verifier::RegTypeCache type_cache(false);
+  TypeInferenceVisitor tiv(NULL, &td, &type_cache);
+  const Type* int_type = &type_cache.Integer();
+  const Type* short_type = &type_cache.Short();
+  const Type* is_type = tiv.MergeTypes(int_type, short_type);
+  const Type* si_type = tiv.MergeTypes(short_type, int_type);
+  EXPECT_TRUE(is_type == int_type);
+  EXPECT_TRUE(si_type == int_type);
+}
+
+TEST_F(TypeInferenceVisitorTest, MergeMultipleInts) {
+  int N = 10;  // Number of types to merge.
+  TypeData td;
+  art::verifier::RegTypeCache type_cache(false);
+  TypeInferenceVisitor tiv(NULL, &td, &type_cache);
+  std::vector<const Type*> types;
+  for (int i = 0; i < N; i++) {
+    const Type* new_type = &type_cache.Integer();
+    types.push_back(new_type);
+  }
+  const Type* merged_type = tiv.MergeTypes(types);
+  EXPECT_TRUE(merged_type == &type_cache.Integer());
+}
+
+TEST_F(TypeInferenceVisitorTest, MergeMultipleShorts) {
+  int N = 10;  // Number of types to merge.
+  TypeData td;
+  art::verifier::RegTypeCache type_cache(false);
+  TypeInferenceVisitor tiv(NULL, &td, &type_cache);
+  std::vector<const Type*> types;
+  for (int i = 0; i < N; i++) {
+    const Type* new_type = &type_cache.Short();
+    types.push_back(new_type);
+  }
+  const Type* merged_type = tiv.MergeTypes(types);
+  EXPECT_TRUE(merged_type == &type_cache.Short());
+}
+
+TEST_F(TypeInferenceVisitorTest, MergeMultipleIntsWithShorts) {
+  int N = 10;  // Number of types to merge.
+  TypeData td;
+  art::verifier::RegTypeCache type_cache(false);
+  TypeInferenceVisitor tiv(NULL, &td, &type_cache);
+  std::vector<const Type*> types;
+  for (int i = 0; i < N; i++) {
+    const Type* short_type = &type_cache.Short();
+    const Type* int_type = &type_cache.Integer();
+    types.push_back(short_type);
+    types.push_back(int_type);
+  }
+  const Type* merged_type = tiv.MergeTypes(types);
+  EXPECT_TRUE(merged_type == &type_cache.Integer());
+}
+
+TEST_F(TypeInferenceVisitorTest, GetOperandTypes) {
+  int N = 10;  // Number of types to merge.
+  TypeData td;
+  art::verifier::RegTypeCache type_cache(false);
+  TypeInferenceVisitor tiv(NULL, &td, &type_cache);
+  std::vector<const Type*> types;
+  std::vector<InstructionNode*> preds;
+  for (int i = 0; i < N; i++) {
+    const Type* short_type = &type_cache.Short();
+    const Type* int_type = &type_cache.Integer();
+    TestInstructionNode* short_inst =
+        new TestInstructionNode(std::vector<InstructionNode*>());
+    TestInstructionNode* int_inst =
+        new TestInstructionNode(std::vector<InstructionNode*>());
+    preds.push_back(short_inst);
+    preds.push_back(int_inst);
+    td.SetTypeOf(short_inst->Id(), short_type);
+    td.SetTypeOf(int_inst->Id(), int_type);
+    types.push_back(short_type);
+    types.push_back(int_type);
+  }
+  TestInstructionNode* inst_to_test = new TestInstructionNode(preds);
+  std::vector<const Type*> result = tiv.GetOperandTypes(inst_to_test);
+  EXPECT_TRUE(result.size() == types.size());
+  EXPECT_TRUE(true == std::equal(types.begin(), types.begin() + 2, result.begin()));
+}
+
+
+}  // namespace art
diff --git a/compiler/sea_ir/types/types.h b/compiler/sea_ir/types/types.h
new file mode 100644
index 0000000000..64f25243d0
--- /dev/null
+++ b/compiler/sea_ir/types/types.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_SEA_IR_TYPES_TYPES_H_
+#define ART_COMPILER_SEA_IR_TYPES_TYPES_H_
+
+#include "safe_map.h"
+#include "verifier/reg_type.h"
+#include "verifier/reg_type_cache.h"
+
+namespace sea_ir {
+
+// TODO: Replace typedef with an actual class implementation when we have more types.
+typedef art::verifier::RegType Type;
+
+// Stores information about the result type of each instruction.
+// Note: Main purpose is to encapsulate the map<instruction id, type*>,
+//       so that we can replace the underlying storage at any time.
+class TypeData {
+ public:
+  art::SafeMap<int, const Type*>* GetTypeMap() {
+    return &type_map_;
+  }
+  // Returns the type associated with instruction with @instruction_id.
+  const Type* FindTypeOf(int instruction_id) {
+    art::SafeMap<int, const Type*>::const_iterator result_it = type_map_.find(instruction_id);
+    if (type_map_.end() != result_it) {
+      return result_it->second;
+    }
+    return NULL;
+  }
+
+  // Saves the fact that instruction @instruction_id produces a value of type @type.
+  void SetTypeOf(int instruction_id, const Type* type) {
+    type_map_.Overwrite(instruction_id, type);
+  }
+
+ private:
+  art::SafeMap<int, const Type*> type_map_;
+};
+
+
+
+}  // namespace sea_ir
+#endif  // ART_COMPILER_SEA_IR_TYPES_TYPES_H_
diff --git a/compiler/utils/scoped_hashtable_test.cc b/compiler/utils/scoped_hashtable_test.cc
index d5f9f7d035..68608f01d1 100644
--- a/compiler/utils/scoped_hashtable_test.cc
+++ b/compiler/utils/scoped_hashtable_test.cc
@@ -15,7 +15,7 @@
  */
 
 #include "common_test.h"
-#include "scoped_hashtable.h"
+#include "utils/scoped_hashtable.h"
 
 using utils::ScopedHashtable;
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 21ba0d2e04..2f41bcdf15 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -179,7 +179,6 @@ class LOCKABLE Mutex : public BaseMutex {
   const bool recursive_;  // Can the lock be recursively held?
   unsigned int recursion_count_;
   friend class ConditionVariable;
-  friend class MutexTester;
   DISALLOW_COPY_AND_ASSIGN(Mutex);
 };
 
@@ -290,7 +289,6 @@ class LOCKABLE ReaderWriterMutex : public BaseMutex {
 #else
   pthread_rwlock_t rwlock_;
 #endif
-  friend class MutexTester;
   DISALLOW_COPY_AND_ASSIGN(ReaderWriterMutex);
 };
 
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index b58b0ac722..78a6883f0b 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -39,7 +39,7 @@ CumulativeLogger::CumulativeLogger(const std::string& name)
 }
 
 CumulativeLogger::~CumulativeLogger() {
-  STLDeleteElements(&histograms_);
+  STLDeleteValues(&histograms_);
 }
 
 void CumulativeLogger::SetName(const std::string& name) {
@@ -47,18 +47,17 @@ void CumulativeLogger::SetName(const std::string& name) {
 }
 
 void CumulativeLogger::Start() {
-  MutexLock mu(Thread::Current(), lock_);
-  index_ = 0;
 }
 
 void CumulativeLogger::End() {
   MutexLock mu(Thread::Current(), lock_);
   iterations_++;
 }
+
 void CumulativeLogger::Reset() {
   MutexLock mu(Thread::Current(), lock_);
   iterations_ = 0;
-  STLDeleteElements(&histograms_);
+  STLDeleteValues(&histograms_);
 }
 
 uint64_t CumulativeLogger::GetTotalNs() const {
@@ -68,36 +67,19 @@ uint64_t CumulativeLogger::GetTotalNs() const {
 uint64_t CumulativeLogger::GetTotalTime() const {
   MutexLock mu(Thread::Current(), lock_);
   uint64_t total = 0;
-  for (size_t i = 0; i < histograms_.size(); ++i) {
-    total += histograms_[i]->Sum();
+  for (CumulativeLogger::HistogramsIterator it = histograms_.begin(), end = histograms_.end();
+       it != end; ++it) {
+    total += it->second->Sum();
   }
   return total;
 }
 
-
 void CumulativeLogger::AddLogger(const base::TimingLogger &logger) {
   MutexLock mu(Thread::Current(), lock_);
-  const std::vector<std::pair<uint64_t, const char*> >& splits = logger.GetSplits();
-  typedef std::vector<std::pair<uint64_t, const char*> >::const_iterator It;
-  // The first time this is run, the histograms array will be empty.
-  if (kIsDebugBuild && !histograms_.empty() && splits.size() != histograms_.size()) {
-    LOG(ERROR) << "Mismatch in splits.";
-    typedef std::vector<Histogram<uint64_t> *>::const_iterator It2;
-    It it = splits.begin();
-    It2 it2 = histograms_.begin();
-    while ((it != splits.end()) && (it2 != histograms_.end())) {
-      if (it != splits.end()) {
-        LOG(ERROR) << "\tsplit: " << it->second;
-        ++it;
-      }
-      if (it2 != histograms_.end()) {
-        LOG(ERROR) << "\tpreviously record: " << (*it2)->Name();
-        ++it2;
-      }
-    }
-  }
-  for (It it = splits.begin(), end = splits.end(); it != end; ++it) {
-    std::pair<uint64_t, const char*> split = *it;
+  const base::TimingLogger::SplitTimings& splits = logger.GetSplits();
+  for (base::TimingLogger::SplitsIterator it = splits.begin(), end = splits.end();
+       it != end; ++it) {
+    base::TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     const char* split_name = split.second;
     AddPair(split_name, split_time);
@@ -112,23 +94,24 @@ void CumulativeLogger::Dump(std::ostream &os) {
 void CumulativeLogger::AddPair(const std::string &label, uint64_t delta_time) {
   // Convert delta time to microseconds so that we don't overflow our counters.
   delta_time /= kAdjust;
-  if (histograms_.size() <= index_) {
+
+  if (histograms_.find(label) == histograms_.end()) {
+    // TODO: Shoud this be a defined constant so we we know out of which orifice 16 and 100 were picked?
     const size_t max_buckets = Runtime::Current()->GetHeap()->IsLowMemoryMode() ? 16 : 100;
-    histograms_.push_back(new Histogram<uint64_t>(label.c_str(), 50, max_buckets));
-    DCHECK_GT(histograms_.size(), index_);
+    // TODO: Should this be a defined constant so we know 50 of WTF?
+    histograms_[label] = new Histogram<uint64_t>(label.c_str(), 50, max_buckets);
   }
-  histograms_[index_]->AddValue(delta_time);
-  DCHECK_EQ(label, histograms_[index_]->Name());
-  ++index_;
+  histograms_[label]->AddValue(delta_time);
 }
 
 void CumulativeLogger::DumpHistogram(std::ostream &os) {
   os << "Start Dumping histograms for " << iterations_ << " iterations"
      << " for " << name_ << "\n";
-  for (size_t Idx = 0; Idx < histograms_.size(); Idx++) {
+  for (CumulativeLogger::HistogramsIterator it = histograms_.begin(), end = histograms_.end();
+       it != end; ++it) {
     Histogram<uint64_t>::CumulativeData cumulative_data;
-    histograms_[Idx]->CreateHistogram(cumulative_data);
-    histograms_[Idx]->PrintConfidenceIntervals(os, 0.99, cumulative_data);
+    it->second->CreateHistogram(cumulative_data);
+    it->second->PrintConfidenceIntervals(os, 0.99, cumulative_data);
     // Reset cumulative values to save memory. We don't expect DumpHistogram to be called often, so
     // it is not performance critical.
   }
@@ -139,58 +122,42 @@ void CumulativeLogger::DumpHistogram(std::ostream &os) {
 namespace base {
 
 TimingLogger::TimingLogger(const char* name, bool precise, bool verbose)
-    : name_(name), precise_(precise), verbose_(verbose),
-      current_split_(NULL), current_split_start_ns_(0) {
+    : name_(name), precise_(precise), verbose_(verbose), current_split_(NULL) {
 }
 
 void TimingLogger::Reset() {
   current_split_ = NULL;
-  current_split_start_ns_ = 0;
   splits_.clear();
 }
 
 void TimingLogger::StartSplit(const char* new_split_label) {
-  DCHECK(current_split_ == NULL);
-  if (verbose_) {
-    LOG(INFO) << "Begin: " << new_split_label;
-  }
-  current_split_ = new_split_label;
-  ATRACE_BEGIN(current_split_);
-  current_split_start_ns_ = NanoTime();
+  DCHECK(new_split_label != NULL) << "Starting split (" << new_split_label << ") with null label.";
+  TimingLogger::ScopedSplit* explicit_scoped_split = new TimingLogger::ScopedSplit(new_split_label, this);
+  explicit_scoped_split->explicit_ = true;
+}
+
+void TimingLogger::EndSplit() {
+  CHECK(current_split_ != NULL) << "Ending a non-existent split.";
+  DCHECK(current_split_->label_ != NULL);
+  DCHECK(current_split_->explicit_ == true) << "Explicitly ending scoped split: " << current_split_->label_;
+
+  delete current_split_;
 }
 
 // Ends the current split and starts the one given by the label.
 void TimingLogger::NewSplit(const char* new_split_label) {
-  DCHECK(current_split_ != NULL);
-  uint64_t current_time = NanoTime();
-  uint64_t split_time = current_time - current_split_start_ns_;
-  ATRACE_END();
-  splits_.push_back(std::pair<uint64_t, const char*>(split_time, current_split_));
-  if (verbose_) {
-    LOG(INFO) << "End: " << current_split_ << " " << PrettyDuration(split_time) << "\n"
-        << "Begin: " << new_split_label;
-  }
-  current_split_ = new_split_label;
-  ATRACE_BEGIN(current_split_);
-  current_split_start_ns_ = current_time;
-}
+  CHECK(current_split_ != NULL) << "Inserting a new split (" << new_split_label
+                                << ") into a non-existent split.";
+  DCHECK(new_split_label != NULL) << "New split (" << new_split_label << ") with null label.";
 
-void TimingLogger::EndSplit() {
-  DCHECK(current_split_ != NULL);
-  uint64_t current_time = NanoTime();
-  uint64_t split_time = current_time - current_split_start_ns_;
-  ATRACE_END();
-  if (verbose_) {
-    LOG(INFO) << "End: " << current_split_ << " " << PrettyDuration(split_time);
-  }
-  splits_.push_back(std::pair<uint64_t, const char*>(split_time, current_split_));
+  current_split_->TailInsertSplit(new_split_label);
 }
 
 uint64_t TimingLogger::GetTotalNs() const {
   uint64_t total_ns = 0;
-  typedef std::vector<std::pair<uint64_t, const char*> >::const_iterator It;
-  for (It it = splits_.begin(), end = splits_.end(); it != end; ++it) {
-    std::pair<uint64_t, const char*> split = *it;
+  for (base::TimingLogger::SplitsIterator it = splits_.begin(), end = splits_.end();
+       it != end; ++it) {
+    base::TimingLogger::SplitTiming split = *it;
     total_ns += split.first;
   }
   return total_ns;
@@ -199,9 +166,9 @@ uint64_t TimingLogger::GetTotalNs() const {
 void TimingLogger::Dump(std::ostream &os) const {
   uint64_t longest_split = 0;
   uint64_t total_ns = 0;
-  typedef std::vector<std::pair<uint64_t, const char*> >::const_iterator It;
-  for (It it = splits_.begin(), end = splits_.end(); it != end; ++it) {
-    std::pair<uint64_t, const char*> split = *it;
+  for (base::TimingLogger::SplitsIterator it = splits_.begin(), end = splits_.end();
+       it != end; ++it) {
+    base::TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     longest_split = std::max(longest_split, split_time);
     total_ns += split_time;
@@ -210,8 +177,9 @@ void TimingLogger::Dump(std::ostream &os) const {
   TimeUnit tu = GetAppropriateTimeUnit(longest_split);
   uint64_t divisor = GetNsToTimeUnitDivisor(tu);
   // Print formatted splits.
-  for (It it = splits_.begin(), end = splits_.end(); it != end; ++it) {
-    std::pair<uint64_t, const char*> split = *it;
+  for (base::TimingLogger::SplitsIterator it = splits_.begin(), end = splits_.end();
+       it != end; ++it) {
+    base::TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     if (!precise_ && divisor >= 1000) {
       // Make the fractional part 0.
@@ -223,5 +191,102 @@ void TimingLogger::Dump(std::ostream &os) const {
   os << name_ << ": end, " << NsToMs(total_ns) << " ms\n";
 }
 
+
+TimingLogger::ScopedSplit::ScopedSplit(const char* label, TimingLogger* timing_logger) {
+  DCHECK(label != NULL) << "New scoped split (" << label << ") with null label.";
+  CHECK(timing_logger != NULL) << "New scoped split (" << label << ") without TimingLogger.";
+  timing_logger_ = timing_logger;
+  label_ = label;
+  running_ns_ = 0;
+  explicit_ = false;
+
+  // Stash away the current split and pause it.
+  enclosing_split_ = timing_logger->current_split_;
+  if (enclosing_split_ != NULL) {
+    enclosing_split_->Pause();
+  }
+
+  timing_logger_->current_split_ = this;
+
+  ATRACE_BEGIN(label_);
+
+  start_ns_ = NanoTime();
+  if (timing_logger_->verbose_) {
+    LOG(INFO) << "Begin: " << label_;
+  }
+}
+
+TimingLogger::ScopedSplit::~ScopedSplit() {
+  uint64_t current_time = NanoTime();
+  uint64_t split_time = current_time - start_ns_;
+  running_ns_ += split_time;
+  ATRACE_END();
+
+  if (timing_logger_->verbose_) {
+    LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time);
+  }
+
+  // If one or more enclosed explcitly started splits are not terminated we can
+  // either fail or "unwind" the stack of splits in the timing logger to 'this'
+  // (by deleting the intervening scoped splits). This implements the latter.
+  TimingLogger::ScopedSplit* current = timing_logger_->current_split_;
+  while ((current != NULL) && (current != this)) {
+    delete current;
+    current = timing_logger_->current_split_;
+  }
+
+  CHECK(current != NULL) << "Missing scoped split (" << this->label_
+                           << ") in timing logger (" << timing_logger_->name_ << ").";
+  CHECK(timing_logger_->current_split_ == this);
+
+  timing_logger_->splits_.push_back(SplitTiming(running_ns_, label_));
+
+  timing_logger_->current_split_ = enclosing_split_;
+  if (enclosing_split_ != NULL) {
+    enclosing_split_->UnPause();
+  }
+}
+
+
+void TimingLogger::ScopedSplit::TailInsertSplit(const char* label) {
+  // Sleight of hand here: Rather than embedding a new scoped split, we're updating the current
+  // scoped split in place. Basically, it's one way to make explicit and scoped splits compose
+  // well while maintaining the current semantics of NewSplit. An alternative is to push a new split
+  // since we unwind the stack of scoped splits in the scoped split destructor. However, this implies
+  // that the current split is not ended by NewSplit (which calls TailInsertSplit), which would
+  // be different from what we had before.
+
+  uint64_t current_time = NanoTime();
+  uint64_t split_time = current_time - start_ns_;
+  ATRACE_END();
+  timing_logger_->splits_.push_back(std::pair<uint64_t, const char*>(split_time, label_));
+
+  if (timing_logger_->verbose_) {
+    LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time) << "\n"
+              << "Begin: " << label;
+  }
+
+  label_ = label;
+  start_ns_ = current_time;
+  running_ns_ = 0;
+
+  ATRACE_BEGIN(label);
+}
+
+void TimingLogger::ScopedSplit::Pause() {
+  uint64_t current_time = NanoTime();
+  uint64_t split_time = current_time - start_ns_;
+  running_ns_ += split_time;
+  ATRACE_END();
+}
+
+
+void TimingLogger::ScopedSplit::UnPause() {
+  uint64_t current_time = NanoTime();
+
+  start_ns_ = current_time;
+  ATRACE_BEGIN(label_);
+}
+
 }  // namespace base
 }  // namespace art
diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h
index 0998837517..8649a960a2 100644
--- a/runtime/base/timing_logger.h
+++ b/runtime/base/timing_logger.h
@@ -23,6 +23,7 @@
 
 #include <string>
 #include <vector>
+#include <map>
 
 namespace art {
 
@@ -32,6 +33,9 @@ namespace base {
 
 class CumulativeLogger {
  public:
+  typedef std::map<std::string, Histogram<uint64_t> *> Histograms;
+  typedef std::map<std::string, Histogram<uint64_t> *>::const_iterator HistogramsIterator;
+
   explicit CumulativeLogger(const std::string& name);
   void prepare_stats();
   ~CumulativeLogger();
@@ -51,11 +55,10 @@ class CumulativeLogger {
   void DumpHistogram(std::ostream &os) EXCLUSIVE_LOCKS_REQUIRED(lock_);
   uint64_t GetTotalTime() const;
   static const uint64_t kAdjust = 1000;
-  std::vector<Histogram<uint64_t> *> histograms_ GUARDED_BY(lock_);
+  Histograms histograms_ GUARDED_BY(lock_);
   std::string name_;
   const std::string lock_name_;
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  size_t index_ GUARDED_BY(lock_);
   size_t iterations_ GUARDED_BY(lock_);
 
   DISALLOW_COPY_AND_ASSIGN(CumulativeLogger);
@@ -63,16 +66,22 @@ class CumulativeLogger {
 
 namespace base {
 
-// A replacement to timing logger that know when a split starts for the purposes of logging.
+
+// A timing logger that knows when a split starts for the purposes of logging tools, like systrace.
 class TimingLogger {
  public:
+  // Splits are nanosecond times and split names.
+  typedef std::pair<uint64_t, const char*> SplitTiming;
+  typedef std::vector<SplitTiming> SplitTimings;
+  typedef std::vector<SplitTiming>::const_iterator SplitsIterator;
+
   explicit TimingLogger(const char* name, bool precise, bool verbose);
 
   // Clears current splits and labels.
   void Reset();
 
-  // Starts a split, a split shouldn't be in progress.
-  void StartSplit(const char*  new_split_label);
+  // Starts a split
+  void StartSplit(const char* new_split_label);
 
   // Ends the current split and starts the one given by the label.
   void NewSplit(const char* new_split_label);
@@ -84,10 +93,53 @@ class TimingLogger {
 
   void Dump(std::ostream& os) const;
 
-  const std::vector<std::pair<uint64_t, const char*> >& GetSplits() const {
+  // Scoped timing splits that can be nested and composed with the explicit split
+  // starts and ends.
+  class ScopedSplit {
+    public:
+      explicit ScopedSplit(const char* label, TimingLogger* timing_logger);
+
+      ~ScopedSplit();
+
+      friend class TimingLogger;
+
+    private:
+      // Pauses timing of the split, usually due to nesting of another split.
+      void Pause();
+
+      // Unpauses timing of the split, usually because a nested split has ended.
+      void UnPause();
+
+      // Used by new split to swap splits in place in a ScopedSplit instance.
+      void TailInsertSplit(const char* label);
+
+      // The scoped split immediately enclosing this split. Essentially, we get a
+      // stack of nested splits through this field.
+      ScopedSplit* enclosing_split_;
+
+      // Was this created via TimingLogger's StartSplit?
+      bool explicit_;
+
+      // The split's name.
+      const char* label_;
+
+      // The current split's latest start time. (It may have been paused and restarted.)
+      uint64_t start_ns_;
+
+      // The running time, outside of pauses.
+      uint64_t running_ns_;
+
+      // The timing logger holding this split.
+      TimingLogger* timing_logger_;
+
+      DISALLOW_COPY_AND_ASSIGN(ScopedSplit);
+  };
+
+  const SplitTimings& GetSplits() const {
     return splits_;
   }
 
+  friend class ScopedSplit;
  protected:
   // The name of the timing logger.
   const char* name_;
@@ -99,14 +151,11 @@ class TimingLogger {
   // Verbose logging.
   const bool verbose_;
 
-  // The name of the current split.
-  const char* current_split_;
+  // The current scoped split is also the 'top' of the stack of splits in progress.
+  ScopedSplit* current_split_;
 
-  // The nanosecond time the current split started on.
-  uint64_t current_split_start_ns_;
-
-  // Splits are nanosecond times and split names.
-  std::vector<std::pair<uint64_t, const char*> > splits_;
+  // Splits that have ended.
+  SplitTimings splits_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(TimingLogger);
diff --git a/runtime/base/timing_logger_test.cc b/runtime/base/timing_logger_test.cc
new file mode 100644
index 0000000000..8f28e4809b
--- /dev/null
+++ b/runtime/base/timing_logger_test.cc
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "timing_logger.h"
+
+#include "common_test.h"
+
+namespace art {
+
+class TimingLoggerTest : public CommonTest {};
+
+// TODO: Negative test cases (improper pairing of EndSplit, etc.)
+
+TEST_F(TimingLoggerTest, StartEnd) {
+  const char* split1name = "First Split";
+  base::TimingLogger timings("StartEnd", true, false);
+
+  timings.StartSplit(split1name);
+
+  timings.EndSplit();  // Ends split1.
+
+  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+
+  EXPECT_EQ(1U, splits.size());
+  EXPECT_STREQ(splits[0].second, split1name);
+}
+
+
+TEST_F(TimingLoggerTest, StartNewEnd) {
+  const char* split1name = "First Split";
+  const char* split2name = "Second Split";
+  const char* split3name = "Third Split";
+  base::TimingLogger timings("StartNewEnd", true, false);
+
+  timings.StartSplit(split1name);
+
+  timings.NewSplit(split2name);  // Ends split1.
+
+  timings.NewSplit(split3name);  // Ends split2.
+
+  timings.EndSplit();  // Ends split3.
+
+  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+
+  EXPECT_EQ(3U, splits.size());
+  EXPECT_STREQ(splits[0].second, split1name);
+  EXPECT_STREQ(splits[1].second, split2name);
+  EXPECT_STREQ(splits[2].second, split3name);
+}
+
+TEST_F(TimingLoggerTest, StartNewEndNested) {
+  const char* split1name = "First Split";
+  const char* split2name = "Second Split";
+  const char* split3name = "Third Split";
+  const char* split4name = "Fourth Split";
+  const char* split5name = "Fifth Split";
+  base::TimingLogger timings("StartNewEndNested", true, false);
+
+  timings.StartSplit(split1name);
+
+  timings.NewSplit(split2name);  // Ends split1.
+
+  timings.StartSplit(split3name);
+
+  timings.StartSplit(split4name);
+
+  timings.NewSplit(split5name);  // Ends split4.
+
+  timings.EndSplit();  // Ends split5.
+
+  timings.EndSplit();  // Ends split3.
+
+  timings.EndSplit();  // Ends split2.
+
+  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+
+  EXPECT_EQ(5U, splits.size());
+  EXPECT_STREQ(splits[0].second, split1name);
+  EXPECT_STREQ(splits[1].second, split4name);
+  EXPECT_STREQ(splits[2].second, split5name);
+  EXPECT_STREQ(splits[3].second, split3name);
+  EXPECT_STREQ(splits[4].second, split2name);
+}
+
+
+TEST_F(TimingLoggerTest, Scoped) {
+  const char* outersplit = "Outer Split";
+  const char* innersplit1 = "Inner Split 1";
+  const char* innerinnersplit1 = "Inner Inner Split 1";
+  const char* innersplit2 = "Inner Split 2";
+  base::TimingLogger timings("Scoped", true, false);
+
+  {
+      base::TimingLogger::ScopedSplit outer(outersplit, &timings);
+
+      {
+          base::TimingLogger::ScopedSplit inner1(innersplit1, &timings);
+
+          {
+              base::TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
+          }  // Ends innerinnersplit1.
+      }  // Ends innersplit1.
+
+      {
+          base::TimingLogger::ScopedSplit inner2(innersplit2, &timings);
+      }  // Ends innersplit2.
+  }  // Ends outersplit.
+
+  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+
+  EXPECT_EQ(4U, splits.size());
+  EXPECT_STREQ(splits[0].second, innerinnersplit1);
+  EXPECT_STREQ(splits[1].second, innersplit1);
+  EXPECT_STREQ(splits[2].second, innersplit2);
+  EXPECT_STREQ(splits[3].second, outersplit);
+}
+
+
+TEST_F(TimingLoggerTest, ScopedAndExplicit) {
+  const char* outersplit = "Outer Split";
+  const char* innersplit = "Inner Split";
+  const char* innerinnersplit1 = "Inner Inner Split 1";
+  const char* innerinnersplit2 = "Inner Inner Split 2";
+  base::TimingLogger timings("Scoped", true, false);
+
+  timings.StartSplit(outersplit);
+
+  {
+      base::TimingLogger::ScopedSplit inner(innersplit, &timings);
+
+      timings.StartSplit(innerinnersplit1);
+
+      timings.NewSplit(innerinnersplit2);  // Ends innerinnersplit1.
+  }  // Ends innerinnersplit2, then innersplit.
+
+  timings.EndSplit();  // Ends outersplit.
+
+  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+
+  EXPECT_EQ(4U, splits.size());
+  EXPECT_STREQ(splits[0].second, innerinnersplit1);
+  EXPECT_STREQ(splits[1].second, innerinnersplit2);
+  EXPECT_STREQ(splits[2].second, innersplit);
+  EXPECT_STREQ(splits[3].second, outersplit);
+}
+
+}  // namespace art
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 060c26c3a7..67be2ff028 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -617,9 +617,7 @@ class ClassLinker {
   const void* portable_resolution_trampoline_;
   const void* quick_resolution_trampoline_;
 
-  friend class CommonTest;
   friend class ImageWriter;  // for GetClassRoots
-  friend class ObjectTest;
   FRIEND_TEST(ClassLinkerTest, ClassRootDescriptors);
   FRIEND_TEST(mirror::DexCacheTest, Open);
   FRIEND_TEST(ExceptionTest, FindExceptionHandler);
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index cbdc430b8c..aaff0fc6da 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -313,7 +313,6 @@ void DexFile::InitMembers() {
   method_ids_ = reinterpret_cast<const MethodId*>(b + h->method_ids_off_);
   proto_ids_ = reinterpret_cast<const ProtoId*>(b + h->proto_ids_off_);
   class_defs_ = reinterpret_cast<const ClassDef*>(b + h->class_defs_off_);
-  DCHECK_EQ(size_, header_->file_size_) << GetLocation();
 }
 
 bool DexFile::CheckMagicAndVersion() const {
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 76947e5cee..a60a1398d8 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -209,7 +209,7 @@ class DexFile {
     }
 
     const TypeItem& GetTypeItem(uint32_t idx) const {
-      CHECK_LT(idx, this->size_);
+      DCHECK_LT(idx, this->size_);
       return this->list_[idx];
     }
 
@@ -494,7 +494,7 @@ class DexFile {
 
   // Returns the FieldId at the specified index.
   const FieldId& GetFieldId(uint32_t idx) const {
-    CHECK_LT(idx, NumFieldIds()) << GetLocation();
+    DCHECK_LT(idx, NumFieldIds()) << GetLocation();
     return field_ids_[idx];
   }
 
@@ -585,7 +585,7 @@ class DexFile {
 
   // Returns the ClassDef at the specified index.
   const ClassDef& GetClassDef(uint32_t idx) const {
-    CHECK_LT(idx, NumClassDefs()) << GetLocation();
+    DCHECK_LT(idx, NumClassDefs()) << GetLocation();
     return class_defs_[idx];
   }
 
@@ -1025,7 +1025,7 @@ class ClassDataItemIterator {
     if (pos_ < EndOfInstanceFieldsPos()) {
       return last_idx_ + field_.field_idx_delta_;
     } else {
-      CHECK_LT(pos_, EndOfVirtualMethodsPos());
+      DCHECK_LT(pos_, EndOfVirtualMethodsPos());
       return last_idx_ + method_.method_idx_delta_;
     }
   }
@@ -1033,7 +1033,7 @@ class ClassDataItemIterator {
     if (pos_ < EndOfInstanceFieldsPos()) {
       return field_.access_flags_;
     } else {
-      CHECK_LT(pos_, EndOfVirtualMethodsPos());
+      DCHECK_LT(pos_, EndOfVirtualMethodsPos());
       return method_.access_flags_;
     }
   }
@@ -1045,7 +1045,7 @@ class ClassDataItemIterator {
         return kDirect;
       }
     } else {
-      CHECK_EQ(GetMemberAccessFlags() & kAccStatic, 0U);
+      DCHECK_EQ(GetMemberAccessFlags() & kAccStatic, 0U);
       if ((class_def.access_flags_ & kAccInterface) != 0) {
         return kInterface;
       } else if ((GetMemberAccessFlags() & kAccConstructor) != 0) {
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index f8f2773582..fb0f61b3b2 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -41,10 +41,9 @@ static inline bool byte_cas(byte old_value, byte new_value, byte* address) {
   return success;
 }
 
-template <typename Visitor, typename FingerVisitor>
+template <typename Visitor>
 inline void CardTable::Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
-                            const Visitor& visitor, const FingerVisitor& finger_visitor,
-                            const byte minimum_age) const {
+                            const Visitor& visitor, const byte minimum_age) const {
   DCHECK(bitmap->HasAddress(scan_begin));
   DCHECK(bitmap->HasAddress(scan_end - 1));  // scan_end is the byte after the last byte we scan.
   byte* card_cur = CardFromAddr(scan_begin);
@@ -57,7 +56,7 @@ inline void CardTable::Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_en
     if (*card_cur >= minimum_age) {
       uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur));
       uintptr_t end = start + kCardSize;
-      bitmap->VisitMarkedRange(start, end, visitor, finger_visitor);
+      bitmap->VisitMarkedRange(start, end, visitor);
     }
     ++card_cur;
   }
@@ -87,7 +86,7 @@ inline void CardTable::Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_en
         << "card " << static_cast<size_t>(card_byte) << " word " << (start_word & 0xFF);
         uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card));
         uintptr_t end = start + kCardSize;
-        bitmap->VisitMarkedRange(start, end, visitor, finger_visitor);
+        bitmap->VisitMarkedRange(start, end, visitor);
       }
       start_word >>= 8;
     }
@@ -100,7 +99,7 @@ inline void CardTable::Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_en
     if (*card_cur >= minimum_age) {
       uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur));
       uintptr_t end = start + kCardSize;
-      bitmap->VisitMarkedRange(start, end, visitor, finger_visitor);
+      bitmap->VisitMarkedRange(start, end, visitor);
     }
     ++card_cur;
   }
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 1acaf5bdfc..f03062607d 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -101,9 +101,8 @@ class CardTable {
 
   // For every dirty at least minumum age between begin and end invoke the visitor with the
   // specified argument.
-  template <typename Visitor, typename FingerVisitor>
-  void Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
-            const Visitor& visitor, const FingerVisitor& finger_visitor,
+  template <typename Visitor>
+  void Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end, const Visitor& visitor,
             const byte minimum_age = kCardDirty) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/accounting/heap_bitmap-inl.h b/runtime/gc/accounting/heap_bitmap-inl.h
index f6cf2b53ea..0524ccb69c 100644
--- a/runtime/gc/accounting/heap_bitmap-inl.h
+++ b/runtime/gc/accounting/heap_bitmap-inl.h
@@ -30,7 +30,7 @@ inline void HeapBitmap::Visit(const Visitor& visitor) {
   for (It it = continuous_space_bitmaps_.begin(), end = continuous_space_bitmaps_.end();
       it != end; ++it) {
     SpaceBitmap* bitmap = *it;
-    bitmap->VisitMarkedRange(bitmap->HeapBegin(), bitmap->HeapLimit(), visitor, VoidFunctor());
+    bitmap->VisitMarkedRange(bitmap->HeapBegin(), bitmap->HeapLimit(), visitor);
   }
   // TODO: C++0x auto
   typedef SpaceSetMapVector::iterator It2;
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 718dcf0369..0363acb477 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -261,7 +261,7 @@ void ModUnionTableReferenceCache::Verify() {
       space::ContinuousSpace* space =
           heap->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
       SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      live_bitmap->VisitMarkedRange(start, end, visitor, VoidFunctor());
+      live_bitmap->VisitMarkedRange(start, end, visitor);
     }
   }
 }
@@ -307,12 +307,11 @@ void ModUnionTableReferenceCache::Update() {
     uintptr_t end = start + CardTable::kCardSize;
     SpaceBitmap* live_bitmap =
         heap->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false)->GetLiveBitmap();
-    live_bitmap->VisitMarkedRange(start, end, visitor, VoidFunctor());
+    live_bitmap->VisitMarkedRange(start, end, visitor);
 
     // Update the corresponding references for the card.
     // TODO: C++0x auto
-    SafeMap<const byte*, std::vector<const Object*> >::iterator
-        found = references_.find(card);
+    SafeMap<const byte*, std::vector<const Object*> >::iterator found = references_.find(card);
     if (found == references_.end()) {
       if (cards_references.empty()) {
         // No reason to add empty array.
@@ -364,7 +363,7 @@ void ModUnionTableCardCache::MarkReferences(collector::MarkSweep* mark_sweep) {
     space::ContinuousSpace* cur_space =
         heap_->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
     accounting::SpaceBitmap* cur_live_bitmap = cur_space->GetLiveBitmap();
-    cur_live_bitmap->VisitMarkedRange(start, end, visitor, VoidFunctor());
+    cur_live_bitmap->VisitMarkedRange(start, end, visitor);
     for (++it; it != cc_end; ++it) {
       card = *it;
       start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
@@ -373,7 +372,7 @@ void ModUnionTableCardCache::MarkReferences(collector::MarkSweep* mark_sweep) {
         cur_space = heap_->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
         cur_live_bitmap = cur_space->GetLiveBitmap();
       }
-      cur_live_bitmap->VisitMarkedRange(start, end, visitor, VoidFunctor());
+      cur_live_bitmap->VisitMarkedRange(start, end, visitor);
     }
   }
 }
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index a4cfe3c567..1dde18da4a 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -53,13 +53,10 @@ inline bool SpaceBitmap::Test(const mirror::Object* obj) const {
   return (bitmap_begin_[OffsetToIndex(offset)] & OffsetToMask(offset)) != 0;
 }
 
-template <typename Visitor, typename FingerVisitor>
+template <typename Visitor>
 void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
-                                   const Visitor& visitor,
-                                   const FingerVisitor& finger_visitor) const {
+                                   const Visitor& visitor) const {
   DCHECK_LT(visit_begin, visit_end);
-
-  const size_t word_span = kAlignment * kBitsPerWord;  // Equals IndexToOffset(1).
   const size_t bit_index_start = (visit_begin - heap_begin_) / kAlignment;
   const size_t bit_index_end = (visit_end - heap_begin_ - 1) / kAlignment;
 
@@ -79,7 +76,6 @@ void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
   // If word_start == word_end then handle this case at the same place we handle the right edge.
   if (edge_word != 0 && word_start < word_end) {
     uintptr_t ptr_base = IndexToOffset(word_start) + heap_begin_;
-    finger_visitor(reinterpret_cast<void*>(ptr_base + word_span));
     do {
       const size_t shift = CLZ(edge_word);
       mirror::Object* obj = reinterpret_cast<mirror::Object*>(ptr_base + shift * kAlignment);
@@ -93,7 +89,6 @@ void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
     size_t w = bitmap_begin_[i];
     if (w != 0) {
       uintptr_t ptr_base = IndexToOffset(i) + heap_begin_;
-      finger_visitor(reinterpret_cast<void*>(ptr_base + word_span));
       do {
         const size_t shift = CLZ(w);
         mirror::Object* obj = reinterpret_cast<mirror::Object*>(ptr_base + shift * kAlignment);
@@ -114,7 +109,6 @@ void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
   // Bits that we trim off the right.
   edge_word &= ~((static_cast<size_t>(kWordHighBitMask) >> right_bits) - 1);
   uintptr_t ptr_base = IndexToOffset(word_end) + heap_begin_;
-  finger_visitor(reinterpret_cast<void*>(ptr_base + word_span));
   while (edge_word != 0) {
     const size_t shift = CLZ(edge_word);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(ptr_base + shift * kAlignment);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 674c2621a0..26ab1ded96 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -118,9 +118,8 @@ class SpaceBitmap {
     }
   }
 
-  template <typename Visitor, typename FingerVisitor>
-  void VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
-                        const Visitor& visitor, const FingerVisitor& finger_visitor) const
+  template <typename Visitor>
+  void VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end, const Visitor& visitor) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -130,10 +129,8 @@ class SpaceBitmap {
   void InOrderWalk(Callback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static void SweepWalk(const SpaceBitmap& live,
-                        const SpaceBitmap& mark,
-                        uintptr_t base, uintptr_t max,
-                        SweepCallback* thunk, void* arg);
+  static void SweepWalk(const SpaceBitmap& live, const SpaceBitmap& mark, uintptr_t base,
+                        uintptr_t max, SweepCallback* thunk, void* arg);
 
   void CopyFrom(SpaceBitmap* source_bitmap);
 
@@ -179,7 +176,8 @@ class SpaceBitmap {
  private:
   // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1,
   // however, we document that this is expected on heap_end_
-  SpaceBitmap(const std::string& name, MemMap* mem_map, word* bitmap_begin, size_t bitmap_size, const void* heap_begin)
+  SpaceBitmap(const std::string& name, MemMap* mem_map, word* bitmap_begin, size_t bitmap_size,
+              const void* heap_begin)
       : mem_map_(mem_map), bitmap_begin_(bitmap_begin), bitmap_size_(bitmap_size),
         heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)),
         name_(name) {}
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 89c768a34e..c1ca55a5b7 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -71,18 +71,6 @@ static const bool kMeasureOverhead = false;
 static const bool kCountTasks = false;
 static const bool kCountJavaLangRefs = false;
 
-class SetFingerVisitor {
- public:
-  explicit SetFingerVisitor(MarkSweep* const mark_sweep) : mark_sweep_(mark_sweep) {}
-
-  void operator()(void* finger) const {
-    mark_sweep_->SetFinger(reinterpret_cast<Object*>(finger));
-  }
-
- private:
-  MarkSweep* const mark_sweep_;
-};
-
 void MarkSweep::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
   if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
@@ -119,6 +107,7 @@ void MarkSweep::ImmuneSpace(space::ContinuousSpace* space) {
 }
 
 void MarkSweep::BindBitmaps() {
+  timings_.StartSplit("BindBitmaps");
   const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
 
@@ -130,6 +119,7 @@ void MarkSweep::BindBitmaps() {
       ImmuneSpace(space);
     }
   }
+  timings_.EndSplit();
 }
 
 MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
@@ -139,7 +129,6 @@ MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_pre
       current_mark_bitmap_(NULL),
       java_lang_Class_(NULL),
       mark_stack_(NULL),
-      finger_(NULL),
       immune_begin_(NULL),
       immune_end_(NULL),
       soft_reference_list_(NULL),
@@ -159,7 +148,6 @@ void MarkSweep::InitializePhase() {
   timings_.StartSplit("InitializePhase");
   mark_stack_ = GetHeap()->mark_stack_.get();
   DCHECK(mark_stack_ != NULL);
-  finger_ = NULL;
   SetImmuneRange(NULL, NULL);
   soft_reference_list_ = NULL;
   weak_reference_list_ = NULL;
@@ -180,13 +168,17 @@ void MarkSweep::InitializePhase() {
   reference_count_ = 0;
   java_lang_Class_ = Class::GetJavaLangClass();
   CHECK(java_lang_Class_ != NULL);
+  timings_.EndSplit();
+
   FindDefaultMarkBitmap();
-  // Do any pre GC verification.
+
+// Do any pre GC verification.
+  timings_.StartSplit("PreGcVerification");
   heap_->PreGcVerification(this);
+  timings_.EndSplit();
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
-  timings_.NewSplit("ProcessReferences");
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   ProcessReferences(&soft_reference_list_, clear_soft_references_, &weak_reference_list_,
                     &finalizer_reference_list_, &phantom_reference_list_);
@@ -198,7 +190,6 @@ bool MarkSweep::HandleDirtyObjectsPhase() {
   Locks::mutator_lock_->AssertExclusiveHeld(self);
 
   {
-    timings_.NewSplit("ReMarkRoots");
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
     // Re-mark root set.
@@ -216,16 +207,6 @@ bool MarkSweep::HandleDirtyObjectsPhase() {
     // This second sweep makes sure that we don't have any objects in the live stack which point to
     // freed objects. These cause problems since their references may be previously freed objects.
     SweepArray(allocation_stack, false);
-  } else {
-    timings_.NewSplit("UnMarkAllocStack");
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    // The allocation stack contains things allocated since the start of the GC. These may have been
-    // marked during this GC meaning they won't be eligible for reclaiming in the next sticky GC.
-    // Remove these objects from the mark bitmaps so that they will be eligible for sticky
-    // collection.
-    heap_->UnMarkAllocStack(GetHeap()->alloc_space_->GetMarkBitmap(),
-                            GetHeap()->large_object_space_->GetMarkObjects(),
-                            allocation_stack);
   }
   return true;
 }
@@ -238,29 +219,26 @@ void MarkSweep::MarkingPhase() {
   Heap* heap = GetHeap();
   Thread* self = Thread::Current();
 
-  timings_.NewSplit("BindBitmaps");
   BindBitmaps();
   FindDefaultMarkBitmap();
+
   // Process dirty cards and add dirty cards to mod union tables.
   heap->ProcessCards(timings_);
 
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
-  timings_.NewSplit("SwapStacks");
+  timings_.StartSplit("SwapStacks");
   heap->SwapStacks();
+  timings_.EndSplit();
 
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // If we exclusively hold the mutator lock, all threads must be suspended.
-    timings_.NewSplit("MarkRoots");
     MarkRoots();
   } else {
-    timings_.NewSplit("MarkRootsCheckpoint");
     MarkRootsCheckpoint(self);
-    timings_.NewSplit("MarkNonThreadRoots");
     MarkNonThreadRoots();
   }
-  timings_.NewSplit("MarkConcurrentRoots");
   MarkConcurrentRoots();
 
   heap->UpdateAndMarkModUnion(this, timings_, GetGcType());
@@ -270,22 +248,43 @@ void MarkSweep::MarkingPhase() {
 void MarkSweep::MarkReachableObjects() {
   // Mark everything allocated since the last as GC live so that we can sweep concurrently,
   // knowing that new allocations won't be marked as live.
-  timings_.NewSplit("MarkStackAsLive");
+  timings_.StartSplit("MarkStackAsLive");
   accounting::ObjectStack* live_stack = heap_->GetLiveStack();
   heap_->MarkAllocStack(heap_->alloc_space_->GetLiveBitmap(),
                        heap_->large_object_space_->GetLiveObjects(),
                        live_stack);
   live_stack->Reset();
+  timings_.EndSplit();
   // Recursively mark all the non-image bits set in the mark bitmap.
   RecursiveMark();
-  DisableFinger();
 }
 
 void MarkSweep::ReclaimPhase() {
   Thread* self = Thread::Current();
 
   if (!IsConcurrent()) {
+    base::TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
     ProcessReferences(self);
+  } else {
+    base::TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
+    accounting::ObjectStack* allocation_stack = GetHeap()->allocation_stack_.get();
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    // The allocation stack contains things allocated since the start of the GC. These may have been
+    // marked during this GC meaning they won't be eligible for reclaiming in the next sticky GC.
+    // Remove these objects from the mark bitmaps so that they will be eligible for sticky
+    // collection.
+    // There is a race here which is safely handled. Another thread such as the hprof could
+    // have flushed the alloc stack after we resumed the threads. This is safe however, since
+    // reseting the allocation stack zeros it out with madvise. This means that we will either
+    // read NULLs or attempt to unmark a newly allocated object which will not be marked in the
+    // first place.
+    mirror::Object** end = allocation_stack->End();
+    for (mirror::Object** it = allocation_stack->Begin(); it != end; ++it) {
+      Object* obj = *it;
+      if (obj != NULL) {
+        UnMarkObjectNonNull(obj);
+      }
+    }
   }
 
   // Before freeing anything, lets verify the heap.
@@ -293,7 +292,9 @@ void MarkSweep::ReclaimPhase() {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     VerifyImageRoots();
   }
+  timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
+  timings_.EndSplit();
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -304,8 +305,9 @@ void MarkSweep::ReclaimPhase() {
     // Swap the live and mark bitmaps for each space which we modified space. This is an
     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
     // bitmaps.
-    timings_.NewSplit("SwapBitmaps");
+    timings_.StartSplit("SwapBitmaps");
     SwapBitmaps();
+    timings_.EndSplit();
 
     // Unbind the live and mark bitmaps.
     UnBindBitmaps();
@@ -318,6 +320,7 @@ void MarkSweep::SetImmuneRange(Object* begin, Object* end) {
 }
 
 void MarkSweep::FindDefaultMarkBitmap() {
+  timings_.StartSplit("FindDefaultMarkBitmap");
   const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
   // TODO: C++0x
   typedef std::vector<space::ContinuousSpace*>::const_iterator It;
@@ -326,6 +329,7 @@ void MarkSweep::FindDefaultMarkBitmap() {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       current_mark_bitmap_ = (*it)->GetMarkBitmap();
       CHECK(current_mark_bitmap_ != NULL);
+      timings_.EndSplit();
       return;
     }
   }
@@ -348,22 +352,39 @@ void MarkSweep::ExpandMarkStack() {
   }
 }
 
-inline void MarkSweep::MarkObjectNonNullParallel(const Object* obj, bool check_finger) {
+inline void MarkSweep::MarkObjectNonNullParallel(const Object* obj) {
   DCHECK(obj != NULL);
   if (MarkObjectParallel(obj)) {
-    if (kDisableFinger || (check_finger && obj < finger_)) {
-      while (UNLIKELY(!mark_stack_->AtomicPushBack(const_cast<Object*>(obj)))) {
-        // Only reason a push can fail is that the mark stack is full.
-        ExpandMarkStack();
-      }
+    while (UNLIKELY(!mark_stack_->AtomicPushBack(const_cast<Object*>(obj)))) {
+      // Only reason a push can fail is that the mark stack is full.
+      ExpandMarkStack();
+    }
+  }
+}
+
+inline void MarkSweep::UnMarkObjectNonNull(const Object* obj) {
+  DCHECK(!IsImmune(obj));
+  // Try to take advantage of locality of references within a space, failing this find the space
+  // the hard way.
+  accounting::SpaceBitmap* object_bitmap = current_mark_bitmap_;
+  if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
+    accounting::SpaceBitmap* new_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+    if (LIKELY(new_bitmap != NULL)) {
+      object_bitmap = new_bitmap;
+    } else {
+      MarkLargeObject(obj, false);
+      return;
     }
   }
+
+  DCHECK(object_bitmap->HasAddress(obj));
+  object_bitmap->Clear(obj);
 }
 
-inline void MarkSweep::MarkObjectNonNull(const Object* obj, bool check_finger) {
+inline void MarkSweep::MarkObjectNonNull(const Object* obj) {
   DCHECK(obj != NULL);
 
-  if (obj >= immune_begin_ && obj < immune_end_) {
+  if (IsImmune(obj)) {
     DCHECK(IsMarked(obj));
     return;
   }
@@ -376,7 +397,7 @@ inline void MarkSweep::MarkObjectNonNull(const Object* obj, bool check_finger) {
     if (LIKELY(new_bitmap != NULL)) {
       object_bitmap = new_bitmap;
     } else {
-      MarkLargeObject(obj);
+      MarkLargeObject(obj, true);
       return;
     }
   }
@@ -384,19 +405,17 @@ inline void MarkSweep::MarkObjectNonNull(const Object* obj, bool check_finger) {
   // This object was not previously marked.
   if (!object_bitmap->Test(obj)) {
     object_bitmap->Set(obj);
-    if (kDisableFinger || (check_finger && obj < finger_)) {
-      // Do we need to expand the mark stack?
-      if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
-        ExpandMarkStack();
-      }
-      // The object must be pushed on to the mark stack.
-      mark_stack_->PushBack(const_cast<Object*>(obj));
+    // Do we need to expand the mark stack?
+    if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
+      ExpandMarkStack();
     }
+    // The object must be pushed on to the mark stack.
+    mark_stack_->PushBack(const_cast<Object*>(obj));
   }
 }
 
 // Rare case, probably not worth inlining since it will increase instruction cache miss rate.
-bool MarkSweep::MarkLargeObject(const Object* obj) {
+bool MarkSweep::MarkLargeObject(const Object* obj, bool set) {
   // TODO: support >1 discontinuous space.
   space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
   accounting::SpaceSetMap* large_objects = large_object_space->GetMarkObjects();
@@ -413,8 +432,11 @@ bool MarkSweep::MarkLargeObject(const Object* obj) {
     if (kProfileLargeObjects) {
       ++large_object_mark_;
     }
-    large_objects->Set(obj);
-    // Don't need to check finger since large objects never have any object references.
+    if (set) {
+      large_objects->Set(obj);
+    } else {
+      large_objects->Clear(obj);
+    }
     return true;
   }
   return false;
@@ -423,7 +445,7 @@ bool MarkSweep::MarkLargeObject(const Object* obj) {
 inline bool MarkSweep::MarkObjectParallel(const Object* obj) {
   DCHECK(obj != NULL);
 
-  if (obj >= immune_begin_ && obj < immune_end_) {
+  if (IsImmune(obj)) {
     DCHECK(IsMarked(obj));
     return false;
   }
@@ -439,7 +461,7 @@ inline bool MarkSweep::MarkObjectParallel(const Object* obj) {
       // TODO: Remove the Thread::Current here?
       // TODO: Convert this to some kind of atomic marking?
       MutexLock mu(Thread::Current(), large_object_lock_);
-      return MarkLargeObject(obj);
+      return MarkLargeObject(obj, true);
     }
   }
 
@@ -454,13 +476,13 @@ inline bool MarkSweep::MarkObjectParallel(const Object* obj) {
 // need to be added to the mark stack.
 void MarkSweep::MarkObject(const Object* obj) {
   if (obj != NULL) {
-    MarkObjectNonNull(obj, true);
+    MarkObjectNonNull(obj);
   }
 }
 
 void MarkSweep::MarkRoot(const Object* obj) {
   if (obj != NULL) {
-    MarkObjectNonNull(obj, false);
+    MarkObjectNonNull(obj);
   }
 }
 
@@ -468,21 +490,21 @@ void MarkSweep::MarkRootParallelCallback(const Object* root, void* arg) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
   MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObjectNonNullParallel(root, false);
+  mark_sweep->MarkObjectNonNullParallel(root);
 }
 
 void MarkSweep::MarkObjectCallback(const Object* root, void* arg) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
   MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObjectNonNull(root, false);
+  mark_sweep->MarkObjectNonNull(root);
 }
 
 void MarkSweep::ReMarkObjectVisitor(const Object* root, void* arg) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
   MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObjectNonNull(root, true);
+  mark_sweep->MarkObjectNonNull(root);
 }
 
 void MarkSweep::VerifyRootCallback(const Object* root, void* arg, size_t vreg,
@@ -509,16 +531,22 @@ void MarkSweep::VerifyRoots() {
 
 // Marks all objects in the root set.
 void MarkSweep::MarkRoots() {
+  timings_.StartSplit("MarkRoots");
   Runtime::Current()->VisitNonConcurrentRoots(MarkObjectCallback, this);
+  timings_.EndSplit();
 }
 
 void MarkSweep::MarkNonThreadRoots() {
+  timings_.StartSplit("MarkNonThreadRoots");
   Runtime::Current()->VisitNonThreadRoots(MarkObjectCallback, this);
+  timings_.EndSplit();
 }
 
 void MarkSweep::MarkConcurrentRoots() {
+  timings_.StartSplit("MarkConcurrentRoots");
   // Visit all runtime roots and clear dirty flags.
   Runtime::Current()->VisitConcurrentRoots(MarkObjectCallback, this, false, true);
+  timings_.EndSplit();
 }
 
 class CheckObjectVisitor {
@@ -582,27 +610,27 @@ void MarkSweep::ScanGrayObjects(byte minimum_age) {
   accounting::CardTable* card_table = GetHeap()->GetCardTable();
   const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
   ScanObjectVisitor visitor(this);
-  SetFingerVisitor finger_visitor(this);
   // TODO: C++0x
   typedef std::vector<space::ContinuousSpace*>::const_iterator It;
   for (It it = spaces.begin(), space_end = spaces.end(); it != space_end; ++it) {
     space::ContinuousSpace* space = *it;
     switch (space->GetGcRetentionPolicy()) {
       case space::kGcRetentionPolicyNeverCollect:
-        timings_.NewSplit("ScanGrayImageSpaceObjects");
+        timings_.StartSplit("ScanGrayImageSpaceObjects");
         break;
       case space::kGcRetentionPolicyFullCollect:
-        timings_.NewSplit("ScanGrayZygoteSpaceObjects");
+        timings_.StartSplit("ScanGrayZygoteSpaceObjects");
         break;
       case space::kGcRetentionPolicyAlwaysCollect:
-        timings_.NewSplit("ScanGrayAllocSpaceObjects");
+        timings_.StartSplit("ScanGrayAllocSpaceObjects");
         break;
     }
     byte* begin = space->Begin();
     byte* end = space->End();
     // Image spaces are handled properly since live == marked for them.
     accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-    card_table->Scan(mark_bitmap, begin, end, visitor, finger_visitor, minimum_age);
+    card_table->Scan(mark_bitmap, begin, end, visitor, minimum_age);
+    timings_.EndSplit();
   }
 }
 
@@ -626,6 +654,7 @@ void MarkSweep::VerifyImageRoots() {
   // Verify roots ensures that all the references inside the image space point
   // objects which are either in the image space or marked objects in the alloc
   // space
+  timings_.StartSplit("VerifyImageRoots");
   CheckBitmapVisitor visitor(this);
   const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
   // TODO: C++0x
@@ -637,15 +666,16 @@ void MarkSweep::VerifyImageRoots() {
       uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       DCHECK(live_bitmap != NULL);
-      live_bitmap->VisitMarkedRange(begin, end, visitor, VoidFunctor());
+      live_bitmap->VisitMarkedRange(begin, end, visitor);
     }
   }
+  timings_.EndSplit();
 }
 
 // Populates the mark stack based on the set of marked objects and
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
-  timings_.NewSplit("RecursiveMark");
+  base::TimingLogger::ScopedSplit("RecursiveMark", &timings_);
   // RecursiveMark will build the lists of known instances of the Reference classes.
   // See DelayReferenceReferent for details.
   CHECK(soft_reference_list_ == NULL);
@@ -655,10 +685,8 @@ void MarkSweep::RecursiveMark() {
   CHECK(cleared_reference_list_ == NULL);
 
   const bool partial = GetGcType() == kGcTypePartial;
-  SetFingerVisitor set_finger_visitor(this);
   ScanObjectVisitor scan_visitor(this);
   if (!kDisableFinger) {
-    finger_ = NULL;
     const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
     // TODO: C++0x
     typedef std::vector<space::ContinuousSpace*>::const_iterator It;
@@ -674,12 +702,10 @@ void MarkSweep::RecursiveMark() {
         // This function does not handle heap end increasing, so we must use the space end.
         uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
         uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-        current_mark_bitmap_->VisitMarkedRange(begin, end, scan_visitor, set_finger_visitor);
+        current_mark_bitmap_->VisitMarkedRange(begin, end, scan_visitor);
       }
     }
   }
-  DisableFinger();
-  timings_.NewSplit("ProcessMarkStack");
   ProcessMarkStack();
 }
 
@@ -691,12 +717,13 @@ bool MarkSweep::IsMarkedCallback(const Object* object, void* arg) {
 
 void MarkSweep::RecursiveMarkDirtyObjects(byte minimum_age) {
   ScanGrayObjects(minimum_age);
-  timings_.NewSplit("ProcessMarkStack");
   ProcessMarkStack();
 }
 
 void MarkSweep::ReMarkRoots() {
+  timings_.StartSplit("ReMarkRoots");
   Runtime::Current()->VisitRoots(ReMarkObjectVisitor, this, true, true);
+  timings_.EndSplit();
 }
 
 void MarkSweep::SweepJniWeakGlobals(IsMarkedTester is_marked, void* arg) {
@@ -735,12 +762,14 @@ void MarkSweep::SweepSystemWeaksArray(accounting::ObjectStack* allocations) {
   // So compute !(!IsMarked && IsLive) which is equal to (IsMarked || !IsLive).
   // Or for swapped (IsLive || !IsMarked).
 
+  timings_.StartSplit("SweepSystemWeaksArray");
   ArrayMarkedCheck visitor;
   visitor.live_stack = allocations;
   visitor.mark_sweep = this;
   runtime->GetInternTable()->SweepInternTableWeaks(IsMarkedArrayCallback, &visitor);
   runtime->GetMonitorList()->SweepMonitorList(IsMarkedArrayCallback, &visitor);
   SweepJniWeakGlobals(IsMarkedArrayCallback, &visitor);
+  timings_.EndSplit();
 }
 
 void MarkSweep::SweepSystemWeaks() {
@@ -750,9 +779,11 @@ void MarkSweep::SweepSystemWeaks() {
   // !IsMarked && IsLive
   // So compute !(!IsMarked && IsLive) which is equal to (IsMarked || !IsLive).
   // Or for swapped (IsLive || !IsMarked).
+  timings_.StartSplit("SweepSystemWeaks");
   runtime->GetInternTable()->SweepInternTableWeaks(IsMarkedCallback, this);
   runtime->GetMonitorList()->SweepMonitorList(IsMarkedCallback, this);
   SweepJniWeakGlobals(IsMarkedCallback, this);
+  timings_.EndSplit();
 }
 
 bool MarkSweep::VerifyIsLiveCallback(const Object* obj, void* arg) {
@@ -817,6 +848,7 @@ class CheckpointMarkThreadRoots : public Closure {
 
 void MarkSweep::MarkRootsCheckpoint(Thread* self) {
   CheckpointMarkThreadRoots check_point(this);
+  timings_.StartSplit("MarkRootsCheckpoint");
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   // Request the check point is run on all threads returning a count of the threads that must
   // run through the barrier including self.
@@ -831,6 +863,7 @@ void MarkSweep::MarkRootsCheckpoint(Thread* self) {
   self->SetState(kWaitingPerformingGc);
   Locks::mutator_lock_->SharedLock(self);
   Locks::heap_bitmap_lock_->ExclusiveLock(self);
+  timings_.EndSplit();
 }
 
 void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
@@ -869,10 +902,9 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma
 
   // If we don't swap bitmaps then newly allocated Weaks go into the live bitmap but not mark
   // bitmap, resulting in occasional frees of Weaks which are still in use.
-  timings_.NewSplit("SweepSystemWeaks");
   SweepSystemWeaksArray(allocations);
 
-  timings_.NewSplit("Process allocation stack");
+  timings_.StartSplit("Process allocation stack");
   // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
   // going to free.
   accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
@@ -906,8 +938,9 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma
     }
   }
   CHECK_EQ(count, allocations->Size());
-  timings_.NewSplit("FreeList");
+  timings_.EndSplit();
 
+  timings_.StartSplit("FreeList");
   size_t freed_objects = out - objects;
   freed_bytes += space->FreeList(self, freed_objects, objects);
   VLOG(heap) << "Freed " << freed_objects << "/" << count
@@ -915,9 +948,11 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma
   heap_->RecordFree(freed_objects + freed_large_objects, freed_bytes);
   freed_objects_.fetch_add(freed_objects);
   freed_bytes_.fetch_add(freed_bytes);
+  timings_.EndSplit();
 
-  timings_.NewSplit("ResetStack");
+  timings_.StartSplit("ResetStack");
   allocations->Reset();
+  timings_.EndSplit();
 }
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
@@ -925,7 +960,6 @@ void MarkSweep::Sweep(bool swap_bitmaps) {
 
   // If we don't swap bitmaps then newly allocated Weaks go into the live bitmap but not mark
   // bitmap, resulting in occasional frees of Weaks which are still in use.
-  timings_.NewSplit("SweepSystemWeaks");
   SweepSystemWeaks();
 
   const bool partial = (GetGcType() == kGcTypePartial);
@@ -953,22 +987,25 @@ void MarkSweep::Sweep(bool swap_bitmaps) {
         std::swap(live_bitmap, mark_bitmap);
       }
       if (!space->IsZygoteSpace()) {
-        timings_.NewSplit("SweepAllocSpace");
+        timings_.StartSplit("SweepAllocSpace");
         // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
                                            &SweepCallback, reinterpret_cast<void*>(&scc));
+        timings_.EndSplit();
       } else {
-        timings_.NewSplit("SweepZygote");
+        timings_.StartSplit("SweepZygote");
         // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
         // memory.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
                                            &ZygoteSweepCallback, reinterpret_cast<void*>(&scc));
+        timings_.EndSplit();
       }
     }
   }
 
-  timings_.NewSplit("SweepLargeObjects");
+  timings_.StartSplit("SweepLargeObjects");
   SweepLargeObjects(swap_bitmaps);
+  timings_.EndSplit();
 }
 
 void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
@@ -1260,8 +1297,10 @@ void MarkSweep::ProcessMarkStackParallel() {
 // Scan anything that's on the mark stack.
 void MarkSweep::ProcessMarkStack() {
   ThreadPool* thread_pool = GetHeap()->GetThreadPool();
+  timings_.StartSplit("ProcessMarkStack");
   if (kParallelMarkStack && thread_pool != NULL && thread_pool->GetThreadCount() > 0) {
     ProcessMarkStackParallel();
+    timings_.EndSplit();
     return;
   }
 
@@ -1303,6 +1342,7 @@ void MarkSweep::ProcessMarkStack() {
       ScanObject(obj);
     }
   }
+  timings_.EndSplit();
 }
 
 // Walks the reference list marking any references subject to the
@@ -1316,6 +1356,7 @@ void MarkSweep::PreserveSomeSoftReferences(Object** list) {
 
   DCHECK(mark_stack_->IsEmpty());
 
+  timings_.StartSplit("PreserveSomeSoftReferences");
   while (*list != NULL) {
     Object* ref = heap_->DequeuePendingReference(list);
     Object* referent = heap_->GetReferenceReferent(ref);
@@ -1335,6 +1376,8 @@ void MarkSweep::PreserveSomeSoftReferences(Object** list) {
     }
   }
   *list = clear;
+  timings_.EndSplit();
+
   // Restart the mark with the newly black references added to the
   // root set.
   ProcessMarkStack();
@@ -1342,7 +1385,7 @@ void MarkSweep::PreserveSomeSoftReferences(Object** list) {
 
 inline bool MarkSweep::IsMarked(const Object* object) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-  if (object >= immune_begin_ && object < immune_end_) {
+  if (IsImmune(object)) {
     return true;
   }
   DCHECK(current_mark_bitmap_ != NULL);
@@ -1377,6 +1420,7 @@ void MarkSweep::ClearWhiteReferences(Object** list) {
 // referent field is cleared.
 void MarkSweep::EnqueueFinalizerReferences(Object** list) {
   DCHECK(list != NULL);
+  timings_.StartSplit("EnqueueFinalizerReferences");
   MemberOffset zombie_offset = heap_->GetFinalizerReferenceZombieOffset();
   bool has_enqueued = false;
   while (*list != NULL) {
@@ -1392,6 +1436,7 @@ void MarkSweep::EnqueueFinalizerReferences(Object** list) {
       has_enqueued = true;
     }
   }
+  timings_.EndSplit();
   if (has_enqueued) {
     ProcessMarkStack();
   }
@@ -1414,15 +1459,18 @@ void MarkSweep::ProcessReferences(Object** soft_references, bool clear_soft,
     PreserveSomeSoftReferences(soft_references);
   }
 
+  timings_.StartSplit("ProcessReferences");
   // Clear all remaining soft and weak references with white
   // referents.
   ClearWhiteReferences(soft_references);
   ClearWhiteReferences(weak_references);
+  timings_.EndSplit();
 
   // Preserve all white objects with finalize methods and schedule
   // them for finalization.
   EnqueueFinalizerReferences(finalizer_references);
 
+  timings_.StartSplit("ProcessReferences");
   // Clear all f-reachable soft and weak references with white
   // referents.
   ClearWhiteReferences(soft_references);
@@ -1436,9 +1484,11 @@ void MarkSweep::ProcessReferences(Object** soft_references, bool clear_soft,
   DCHECK(*weak_references == NULL);
   DCHECK(*finalizer_references == NULL);
   DCHECK(*phantom_references == NULL);
+  timings_.EndSplit();
 }
 
 void MarkSweep::UnBindBitmaps() {
+  timings_.StartSplit("UnBindBitmaps");
   const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
   // TODO: C++0x
   typedef std::vector<space::ContinuousSpace*>::const_iterator It;
@@ -1456,6 +1506,7 @@ void MarkSweep::UnBindBitmaps() {
       }
     }
   }
+  timings_.EndSplit();
 }
 
 void MarkSweep::FinishPhase() {
@@ -1466,11 +1517,13 @@ void MarkSweep::FinishPhase() {
 
   heap->PostGcVerification(this);
 
-  timings_.NewSplit("GrowForUtilization");
+  timings_.StartSplit("GrowForUtilization");
   heap->GrowForUtilization(GetGcType(), GetDurationNs());
+  timings_.EndSplit();
 
-  timings_.NewSplit("RequestHeapTrim");
+  timings_.StartSplit("RequestHeapTrim");
   heap->RequestHeapTrim();
+  timings_.EndSplit();
 
   // Update the cumulative statistics
   total_time_ns_ += GetDurationNs();
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index d386fd6a64..e39e2f7e8f 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -167,14 +167,6 @@ class MarkSweep : public GarbageCollector {
   void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
-  void SetFinger(mirror::Object* new_finger) {
-    finger_ = new_finger;
-  }
-
-  void DisableFinger() {
-    SetFinger(reinterpret_cast<mirror::Object*>(~static_cast<uintptr_t>(0)));
-  }
-
   size_t GetFreedBytes() const {
     return freed_bytes_;
   }
@@ -261,13 +253,22 @@ class MarkSweep : public GarbageCollector {
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
 
-  void MarkObjectNonNull(const mirror::Object* obj, bool check_finger)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void MarkObjectNonNull(const mirror::Object* obj)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Unmarks an object by clearing the bit inside of the corresponding bitmap, or if it is in a
+  // space set, removing the object from the set.
+  void UnMarkObjectNonNull(const mirror::Object* obj)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void MarkObjectNonNullParallel(const mirror::Object* obj, bool check_finger);
+  // Marks an object atomically, safe to use from multiple threads.
+  void MarkObjectNonNullParallel(const mirror::Object* obj);
 
-  bool MarkLargeObject(const mirror::Object* obj)
+  // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
+  // mark, otherwise we unmark.
+  bool MarkLargeObject(const mirror::Object* obj, bool set)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Returns true if we need to add obj to a mark stack.
@@ -295,6 +296,11 @@ class MarkSweep : public GarbageCollector {
   // Expand mark stack to 2x its current size. Thread safe.
   void ExpandMarkStack();
 
+  // Returns true if an object is inside of the immune region (assumed to be marked).
+  bool IsImmune(const mirror::Object* obj) const {
+    return obj >= immune_begin_ && obj < immune_end_;
+  }
+
   static void VerifyRootCallback(const mirror::Object* root, void* arg, size_t vreg,
                                  const StackVisitor *visitor);
 
@@ -386,8 +392,6 @@ class MarkSweep : public GarbageCollector {
 
   accounting::ObjectStack* mark_stack_;
 
-  mirror::Object* finger_;
-
   // Immune range, every object inside the immune range is assumed to be marked.
   mirror::Object* immune_begin_;
   mirror::Object* immune_end_;
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 71e580d748..aad7c2973f 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -51,12 +51,10 @@ void StickyMarkSweep::BindBitmaps() {
 }
 
 void StickyMarkSweep::MarkReachableObjects() {
-  DisableFinger();
   RecursiveMarkDirtyObjects(accounting::CardTable::kCardDirty - 1);
 }
 
 void StickyMarkSweep::Sweep(bool swap_bitmaps) {
-  timings_.NewSplit("SweepArray");
   accounting::ObjectStack* live_stack = GetHeap()->GetLiveStack();
   SweepArray(live_stack, false);
 }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a9e5b08508..6dcdab9b60 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -21,6 +21,7 @@
 
 #include <limits>
 #include <vector>
+#include <valgrind.h>
 
 #include "base/stl_util.h"
 #include "common_throws.h"
@@ -34,6 +35,7 @@
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/collector/partial_mark_sweep.h"
 #include "gc/collector/sticky_mark_sweep.h"
+#include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -66,6 +68,8 @@ static const bool kDumpGcPerformanceOnShutdown = false;
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static const size_t kMinConcurrentRemainingBytes = 128 * KB;
 const double Heap::kDefaultTargetUtilization = 0.5;
+// If true, measure the total allocation time.
+static const bool kMeasureAllocationTime = false;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& original_image_file_name,
@@ -126,9 +130,9 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       max_free_(max_free),
       target_utilization_(target_utilization),
       total_wait_time_(0),
-      measure_allocation_time_(false),
       total_allocation_time_(0),
-      verify_object_mode_(kHeapVerificationNotPermitted) {
+      verify_object_mode_(kHeapVerificationNotPermitted),
+      running_on_valgrind_(RUNNING_ON_VALGRIND) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
@@ -154,8 +158,16 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
     }
   }
 
+  alloc_space_ = space::DlMallocSpace::Create(Runtime::Current()->IsZygote() ? "zygote space" : "alloc space",
+                                              initial_size,
+                                              growth_limit, capacity,
+                                              requested_alloc_space_begin);
+  CHECK(alloc_space_ != NULL) << "Failed to create alloc space";
+  alloc_space_->SetFootprintLimit(alloc_space_->Capacity());
+  AddContinuousSpace(alloc_space_);
+
   // Allocate the large object space.
-  const bool kUseFreeListSpaceForLOS  = false;
+  const bool kUseFreeListSpaceForLOS = false;
   if (kUseFreeListSpaceForLOS) {
     large_object_space_ = space::FreeListSpace::Create("large object space", NULL, capacity);
   } else {
@@ -164,14 +176,6 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
   CHECK(large_object_space_ != NULL) << "Failed to create large object space";
   AddDiscontinuousSpace(large_object_space_);
 
-  alloc_space_ = space::DlMallocSpace::Create(Runtime::Current()->IsZygote() ? "zygote space" : "alloc space",
-                                              initial_size,
-                                              growth_limit, capacity,
-                                              requested_alloc_space_begin);
-  CHECK(alloc_space_ != NULL) << "Failed to create alloc space";
-  alloc_space_->SetFootprintLimit(alloc_space_->Capacity());
-  AddContinuousSpace(alloc_space_);
-
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
   byte* heap_begin = continuous_spaces_.front()->Begin();
   size_t heap_capacity = continuous_spaces_.back()->End() - continuous_spaces_.front()->Begin();
@@ -469,7 +473,7 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) {
   }
   os << "Total number of allocations: " << total_objects_allocated << "\n";
   os << "Total bytes allocated " << PrettySize(total_bytes_allocated) << "\n";
-  if (measure_allocation_time_) {
+  if (kMeasureAllocationTime) {
     os << "Total time spent allocating: " << PrettyDuration(allocation_time) << "\n";
     os << "Mean allocation time: " << PrettyDuration(allocation_time / total_objects_allocated)
        << "\n";
@@ -566,32 +570,29 @@ mirror::Object* Heap::AllocObject(Thread* self, mirror::Class* c, size_t byte_co
   DCHECK_GE(byte_count, sizeof(mirror::Object));
 
   mirror::Object* obj = NULL;
-  size_t size = 0;
+  size_t bytes_allocated = 0;
   uint64_t allocation_start = 0;
-  if (UNLIKELY(measure_allocation_time_)) {
+  if (UNLIKELY(kMeasureAllocationTime)) {
     allocation_start = NanoTime() / kTimeAdjust;
   }
 
   // We need to have a zygote space or else our newly allocated large object can end up in the
   // Zygote resulting in it being prematurely freed.
-  // We can only do this for primive objects since large objects will not be within the card table
+  // We can only do this for primitive objects since large objects will not be within the card table
   // range. This also means that we rely on SetClass not dirtying the object's card.
   bool large_object_allocation =
       byte_count >= large_object_threshold_ && have_zygote_space_ && c->IsPrimitiveArray();
   if (UNLIKELY(large_object_allocation)) {
-    size = RoundUp(byte_count, kPageSize);
-    obj = Allocate(self, large_object_space_, size);
+    obj = Allocate(self, large_object_space_, byte_count, &bytes_allocated);
     // Make sure that our large object didn't get placed anywhere within the space interval or else
     // it breaks the immune range.
     DCHECK(obj == NULL ||
            reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
            reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
   } else {
-    obj = Allocate(self, alloc_space_, byte_count);
-
+    obj = Allocate(self, alloc_space_, byte_count, &bytes_allocated);
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
-    size = alloc_space_->AllocationSize(obj);
   }
 
   if (LIKELY(obj != NULL)) {
@@ -599,19 +600,21 @@ mirror::Object* Heap::AllocObject(Thread* self, mirror::Class* c, size_t byte_co
 
     // Record allocation after since we want to use the atomic add for the atomic fence to guard
     // the SetClass since we do not want the class to appear NULL in another thread.
-    RecordAllocation(size, obj);
+    RecordAllocation(bytes_allocated, obj);
 
     if (Dbg::IsAllocTrackingEnabled()) {
       Dbg::RecordAllocation(c, byte_count);
     }
-    if (static_cast<size_t>(num_bytes_allocated_) >= concurrent_start_bytes_) {
+    if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_) >= concurrent_start_bytes_)) {
       // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
       SirtRef<mirror::Object> ref(self, obj);
       RequestConcurrentGC(self);
     }
-    VerifyObject(obj);
+    if (kDesiredHeapVerification > kNoHeapVerification) {
+      VerifyObject(obj);
+    }
 
-    if (UNLIKELY(measure_allocation_time_)) {
+    if (UNLIKELY(kMeasureAllocationTime)) {
       total_allocation_time_.fetch_add(NanoTime() / kTimeAdjust - allocation_start);
     }
 
@@ -767,7 +770,7 @@ void Heap::VerifyHeap() {
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordAllocation(size_t size, mirror::Object* obj) {
+inline void Heap::RecordAllocation(size_t size, mirror::Object* obj) {
   DCHECK(obj != NULL);
   DCHECK_GT(size, 0u);
   num_bytes_allocated_.fetch_add(size);
@@ -806,37 +809,55 @@ void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
   }
 }
 
-mirror::Object* Heap::TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                    bool grow) {
-  // Should we try to use a CAS here and fix up num_bytes_allocated_ later with AllocationSize?
-  if (num_bytes_allocated_ + alloc_size > max_allowed_footprint_) {
-    // max_allowed_footprint_ <= growth_limit_ so it is safe to check in here.
-    if (num_bytes_allocated_ + alloc_size > growth_limit_) {
-      // Completely out of memory.
-      return NULL;
-    }
+inline bool Heap::IsOutOfMemoryOnAllocation(size_t alloc_size) {
+  return num_bytes_allocated_ + alloc_size > growth_limit_;
+}
+
+inline mirror::Object* Heap::TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                           bool grow, size_t* bytes_allocated) {
+  if (IsOutOfMemoryOnAllocation(alloc_size)) {
+    return NULL;
   }
+  return space->Alloc(self, alloc_size, bytes_allocated);
+}
 
-  return space->Alloc(self, alloc_size);
+// DlMallocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                           bool grow, size_t* bytes_allocated) {
+  if (IsOutOfMemoryOnAllocation(alloc_size)) {
+    return NULL;
+  }
+  if (!running_on_valgrind_) {
+    return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+  } else {
+    return space->Alloc(self, alloc_size, bytes_allocated);
+  }
 }
 
-mirror::Object* Heap::Allocate(Thread* self, space::AllocSpace* space, size_t alloc_size) {
+template <class T>
+inline mirror::Object* Heap::Allocate(Thread* self, T* space, size_t alloc_size, size_t* bytes_allocated) {
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
   // done in the runnable state where suspension is expected.
   DCHECK_EQ(self->GetState(), kRunnable);
   self->AssertThreadSuspensionIsAllowable();
 
-  mirror::Object* ptr = TryToAllocate(self, space, alloc_size, false);
+  mirror::Object* ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
   if (ptr != NULL) {
     return ptr;
   }
+  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
+}
+
+mirror::Object* Heap::AllocateInternalWithGc(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                             size_t* bytes_allocated) {
+  mirror::Object* ptr;
 
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
   collector::GcType last_gc = WaitForConcurrentGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocate(self, space, alloc_size, false);
+    ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
     if (ptr != NULL) {
       return ptr;
     }
@@ -871,7 +892,7 @@ mirror::Object* Heap::Allocate(Thread* self, space::AllocSpace* space, size_t al
       i = static_cast<size_t>(gc_type_ran);
 
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocate(self, space, alloc_size, false);
+      ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
       if (ptr != NULL) {
         return ptr;
       }
@@ -880,7 +901,7 @@ mirror::Object* Heap::Allocate(Thread* self, space::AllocSpace* space, size_t al
 
   // Allocations have failed after GCs;  this is an exceptional state.
   // Try harder, growing the heap if necessary.
-  ptr = TryToAllocate(self, space, alloc_size, true);
+  ptr = TryToAllocate(self, space, alloc_size, true, bytes_allocated);
   if (ptr != NULL) {
     return ptr;
   }
@@ -895,7 +916,7 @@ mirror::Object* Heap::Allocate(Thread* self, space::AllocSpace* space, size_t al
 
   // We don't need a WaitForConcurrentGcToComplete here either.
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseForAlloc, true);
-  return TryToAllocate(self, space, alloc_size, true);
+  return TryToAllocate(self, space, alloc_size, true, bytes_allocated);
 }
 
 void Heap::SetTargetHeapUtilization(float target) {
@@ -1150,35 +1171,10 @@ void Heap::MarkAllocStack(accounting::SpaceBitmap* bitmap, accounting::SpaceSetM
   }
 }
 
-void Heap::UnMarkAllocStack(accounting::SpaceBitmap* bitmap, accounting::SpaceSetMap* large_objects,
-                            accounting::ObjectStack* stack) {
-  mirror::Object** limit = stack->End();
-  for (mirror::Object** it = stack->Begin(); it != limit; ++it) {
-    const mirror::Object* obj = *it;
-    DCHECK(obj != NULL);
-    if (LIKELY(bitmap->HasAddress(obj))) {
-      bitmap->Clear(obj);
-    } else {
-      large_objects->Clear(obj);
-    }
-  }
-}
-
 collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCause gc_cause,
                                                bool clear_soft_references) {
   Thread* self = Thread::Current();
 
-  switch (gc_cause) {
-    case kGcCauseForAlloc:
-      ATRACE_BEGIN("GC (alloc)");
-      break;
-    case kGcCauseBackground:
-      ATRACE_BEGIN("GC (background)");
-      break;
-    case kGcCauseExplicit:
-      ATRACE_BEGIN("GC (explicit)");
-      break;
-  }
 
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
@@ -1198,7 +1194,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
       }
     }
     if (!start_collect) {
+      // TODO: timinglog this.
       WaitForConcurrentGcToComplete(self);
+
       // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
       //       Not doing at the moment to ensure soft references are cleared.
     }
@@ -1227,6 +1225,18 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
     gc_type = collector::kGcTypePartial;
   }
 
+  switch (gc_cause) {
+    case kGcCauseForAlloc:
+      ATRACE_BEGIN("GC (alloc)");
+      break;
+    case kGcCauseBackground:
+      ATRACE_BEGIN("GC (background)");
+      break;
+    case kGcCauseExplicit:
+      ATRACE_BEGIN("GC (explicit)");
+      break;
+  }
+
   DCHECK_LT(gc_type, collector::kGcTypeMax);
   DCHECK_NE(gc_type, collector::kGcTypeNone);
   collector::MarkSweep* collector = NULL;
@@ -1242,6 +1252,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
   CHECK(collector != NULL)
       << "Could not find garbage collector with concurrent=" << concurrent_gc_
       << " and type=" << gc_type;
+
+  base::TimingLogger& timings = collector->GetTimings();
+
   collector->clear_soft_references_ = clear_soft_references;
   collector->Run();
   total_objects_freed_ever_ += collector->GetFreedObjects();
@@ -1250,42 +1263,43 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCaus
   const size_t duration = collector->GetDurationNs();
   std::vector<uint64_t> pauses = collector->GetPauseTimes();
   bool was_slow = duration > kSlowGcThreshold ||
-      (gc_cause == kGcCauseForAlloc && duration > kLongGcPauseThreshold);
+          (gc_cause == kGcCauseForAlloc && duration > kLongGcPauseThreshold);
   for (size_t i = 0; i < pauses.size(); ++i) {
-    if (pauses[i] > kLongGcPauseThreshold) {
-      was_slow = true;
-    }
+      if (pauses[i] > kLongGcPauseThreshold) {
+          was_slow = true;
+      }
   }
 
   if (was_slow) {
-    const size_t percent_free = GetPercentFree();
-    const size_t current_heap_size = GetBytesAllocated();
-    const size_t total_memory = GetTotalMemory();
-    std::ostringstream pause_string;
-    for (size_t i = 0; i < pauses.size(); ++i) {
-      pause_string << PrettyDuration((pauses[i] / 1000) * 1000)
-                   << ((i != pauses.size() - 1) ? ", " : "");
-    }
-    LOG(INFO) << gc_cause << " " << collector->GetName()
-              << "GC freed " << PrettySize(collector->GetFreedBytes()) << ", "
-              << percent_free << "% free, " << PrettySize(current_heap_size) << "/"
-              << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
-              << " total " << PrettyDuration((duration / 1000) * 1000);
-    if (VLOG_IS_ON(heap)) {
-      LOG(INFO) << Dumpable<base::TimingLogger>(collector->GetTimings());
-    }
+      const size_t percent_free = GetPercentFree();
+      const size_t current_heap_size = GetBytesAllocated();
+      const size_t total_memory = GetTotalMemory();
+      std::ostringstream pause_string;
+      for (size_t i = 0; i < pauses.size(); ++i) {
+          pause_string << PrettyDuration((pauses[i] / 1000) * 1000)
+                       << ((i != pauses.size() - 1) ? ", " : "");
+      }
+      LOG(INFO) << gc_cause << " " << collector->GetName()
+                << "GC freed " << PrettySize(collector->GetFreedBytes()) << ", "
+                << percent_free << "% free, " << PrettySize(current_heap_size) << "/"
+                << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
+                << " total " << PrettyDuration((duration / 1000) * 1000);
+      if (VLOG_IS_ON(heap)) {
+          LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+      }
   }
 
   {
-    MutexLock mu(self, *gc_complete_lock_);
-    is_gc_running_ = false;
-    last_gc_type_ = gc_type;
-    // Wake anyone who may have been waiting for the GC to complete.
-    gc_complete_cond_->Broadcast(self);
+      MutexLock mu(self, *gc_complete_lock_);
+      is_gc_running_ = false;
+      last_gc_type_ = gc_type;
+      // Wake anyone who may have been waiting for the GC to complete.
+      gc_complete_cond_->Broadcast(self);
   }
 
-  // Inform DDMS that a GC completed.
   ATRACE_END();
+
+  // Inform DDMS that a GC completed.
   Dbg::GcDidFinish();
   return gc_type;
 }
@@ -1298,9 +1312,10 @@ void Heap::UpdateAndMarkModUnion(collector::MarkSweep* mark_sweep, base::TimingL
     return;
   }
 
+  base::TimingLogger::ScopedSplit split("UpdateModUnionTable", &timings);
   // Update zygote mod union table.
   if (gc_type == collector::kGcTypePartial) {
-    timings.NewSplit("UpdateZygoteModUnionTable");
+    base::TimingLogger::ScopedSplit split("UpdateZygoteModUnionTable", &timings);
     zygote_mod_union_table_->Update();
 
     timings.NewSplit("ZygoteMarkReferences");
@@ -1380,8 +1395,7 @@ class VerifyReferenceVisitor {
         ScanVisitor scan_visitor;
         byte* byte_cover_begin = reinterpret_cast<byte*>(card_table->AddrFromCard(card_addr));
         card_table->Scan(bitmap, byte_cover_begin,
-                         byte_cover_begin + accounting::CardTable::kCardSize,
-                         scan_visitor, VoidFunctor());
+                         byte_cover_begin + accounting::CardTable::kCardSize, scan_visitor);
 
         // Search to see if any of the roots reference our object.
         void* arg = const_cast<void*>(reinterpret_cast<const void*>(obj));
@@ -1588,15 +1602,15 @@ void Heap::ProcessCards(base::TimingLogger& timings) {
   for (It it = continuous_spaces_.begin(), end = continuous_spaces_.end(); it != end; ++it) {
     space::ContinuousSpace* space = *it;
     if (space->IsImageSpace()) {
-      timings.NewSplit("ModUnionClearCards");
+      base::TimingLogger::ScopedSplit split("ImageModUnionClearCards", &timings);
       image_mod_union_table_->ClearCards(space);
     } else if (space->IsZygoteSpace()) {
-      timings.NewSplit("ZygoteModUnionClearCards");
+      base::TimingLogger::ScopedSplit split("ZygoteModUnionClearCards", &timings);
       zygote_mod_union_table_->ClearCards(space);
     } else {
+      base::TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
       // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
       // were dirty before the GC started.
-      timings.NewSplit("AllocSpaceClearCards");
       card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), VoidFunctor());
     }
   }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c1cff43540..b3346e869f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -360,11 +360,6 @@ class Heap {
                       accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  // Unmark all the objects in the allocation stack in the specified bitmap.
-  void UnMarkAllocStack(accounting::SpaceBitmap* bitmap, accounting::SpaceSetMap* large_objects,
-                        accounting::ObjectStack* stack)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Update and mark mod union table based on gc type.
   void UpdateAndMarkModUnion(collector::MarkSweep* mark_sweep, base::TimingLogger& timings,
                              collector::GcType gc_type)
@@ -400,15 +395,31 @@ class Heap {
  private:
   // Allocates uninitialized storage. Passing in a null space tries to place the object in the
   // large object space.
-  mirror::Object* Allocate(Thread* self, space::AllocSpace* space, size_t num_bytes)
+  template <class T> mirror::Object* Allocate(Thread* self, T* space, size_t num_bytes, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Handles Allocate()'s slow allocation path with GC involved after
+  // an initial allocation attempt failed.
+  mirror::Object* AllocateInternalWithGc(Thread* self, space::AllocSpace* space, size_t num_bytes,
+                                         size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Try to allocate a number of bytes, this function never does any GCs.
-  mirror::Object* TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size, bool grow)
+  mirror::Object* TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size, bool grow,
+                                size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Try to allocate a number of bytes, this function never does any GCs. DlMallocSpace-specialized version.
+  mirror::Object* TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size, bool grow,
+                                size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsOutOfMemoryOnAllocation(size_t alloc_size);
+
   // Pushes a list of cleared references out to the managed heap.
   void EnqueueClearedReferences(mirror::Object** cleared_references);
 
@@ -636,7 +647,6 @@ class Heap {
   uint64_t total_wait_time_;
 
   // Total number of objects allocated in microseconds.
-  const bool measure_allocation_time_;
   AtomicInteger total_allocation_time_;
 
   // The current state of heap verification, may be enabled or disabled.
@@ -644,6 +654,8 @@ class Heap {
 
   std::vector<collector::MarkSweep*> mark_sweep_collectors_;
 
+  const bool running_on_valgrind_;
+
   friend class collector::MarkSweep;
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
new file mode 100644
index 0000000000..54811414e9
--- /dev/null
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
+
+#include "dlmalloc_space.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* DlMallocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
+                                                      size_t* bytes_allocated) {
+  mirror::Object* obj;
+  {
+    MutexLock mu(self, lock_);
+    obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+  }
+  if (obj != NULL) {
+    // Zero freshly allocated memory, done while not holding the space's lock.
+    memset(obj, 0, num_bytes);
+  }
+  return obj;
+}
+
+inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
+  if (result != NULL) {
+    if (kDebugSpaces) {
+      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
+            << ") not in bounds of allocation space " << *this;
+    }
+    size_t allocation_size = AllocationSizeNonvirtual(result);
+    DCHECK(bytes_allocated != NULL);
+    *bytes_allocated = allocation_size;
+    num_bytes_allocated_ += allocation_size;
+    total_bytes_allocated_ += allocation_size;
+    ++total_objects_allocated_;
+    ++num_objects_allocated_;
+  }
+  return result;
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index d539aa269a..8b99e96ceb 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 #include "dlmalloc_space.h"
+#include "dlmalloc_space-inl.h"
 #include "gc/accounting/card_table.h"
 #include "gc/heap.h"
 #include "runtime.h"
@@ -46,8 +47,9 @@ const size_t kValgrindRedZoneBytes = 8;
 // A specialization of DlMallocSpace that provides information to valgrind wrt allocations.
 class ValgrindDlMallocSpace : public DlMallocSpace {
  public:
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes) {
-    void* obj_with_rdz = DlMallocSpace::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes);
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = DlMallocSpace::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                                        bytes_allocated);
     if (obj_with_rdz == NULL) {
       return NULL;
     }
@@ -59,8 +61,9 @@ class ValgrindDlMallocSpace : public DlMallocSpace {
     return result;
   }
 
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes) {
-    void* obj_with_rdz = DlMallocSpace::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = DlMallocSpace::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                              bytes_allocated);
     if (obj_with_rdz == NULL) {
      return NULL;
     }
@@ -234,37 +237,27 @@ void DlMallocSpace::SwapBitmaps() {
   mark_bitmap_->SetName(temp_name);
 }
 
-mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes) {
-  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_calloc(mspace_, 1, num_bytes));
-  if (result != NULL) {
-    if (kDebugSpaces) {
-      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
-            << ") not in bounds of allocation space " << *this;
-    }
-    size_t allocation_size = InternalAllocationSize(result);
-    num_bytes_allocated_ += allocation_size;
-    total_bytes_allocated_ += allocation_size;
-    ++total_objects_allocated_;
-    ++num_objects_allocated_;
-  }
-  return result;
-}
-
-mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes) {
-  MutexLock mu(self, lock_);
-  return AllocWithoutGrowthLocked(num_bytes);
+mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  return AllocNonvirtual(self, num_bytes, bytes_allocated);
 }
 
-mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes) {
-  MutexLock mu(self, lock_);
-  // Grow as much as possible within the mspace.
-  size_t max_allowed = Capacity();
-  mspace_set_footprint_limit(mspace_, max_allowed);
-  // Try the allocation.
-  mirror::Object* result = AllocWithoutGrowthLocked(num_bytes);
-  // Shrink back down as small as possible.
-  size_t footprint = mspace_footprint(mspace_);
-  mspace_set_footprint_limit(mspace_, footprint);
+mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result;
+  {
+    MutexLock mu(self, lock_);
+    // Grow as much as possible within the mspace.
+    size_t max_allowed = Capacity();
+    mspace_set_footprint_limit(mspace_, max_allowed);
+    // Try the allocation.
+    result = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    // Shrink back down as small as possible.
+    size_t footprint = mspace_footprint(mspace_);
+    mspace_set_footprint_limit(mspace_, footprint);
+  }
+  if (result != NULL) {
+    // Zero freshly allocated memory, done while not holding the space's lock.
+    memset(result, 0, num_bytes);
+  }
   // Return the new allocation or NULL.
   CHECK(!kDebugSpaces || result == NULL || Contains(result));
   return result;
@@ -415,8 +408,7 @@ void* DlMallocSpace::MoreCore(intptr_t increment) {
 
 // Virtual functions can't get inlined.
 inline size_t DlMallocSpace::InternalAllocationSize(const mirror::Object* obj) {
-  return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
-      kChunkOverhead;
+  return AllocationSizeNonvirtual(obj);
 }
 
 size_t DlMallocSpace::AllocationSize(const mirror::Object* obj) {
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index c15d0babcc..6d52c269e6 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -50,16 +50,24 @@ class DlMallocSpace : public MemMapSpace, public AllocSpace {
                                size_t capacity, byte* requested_begin);
 
   // Allocate num_bytes without allowing the underlying mspace to grow.
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes);
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
 
   // Allocate num_bytes allowing the underlying mspace to grow.
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
   // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj);
   virtual size_t Free(Thread* self, mirror::Object* ptr);
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
 
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
+    return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
+        kChunkOverhead;
+  }
+
   void* MoreCore(intptr_t increment);
 
   void* GetMspace() const {
@@ -71,7 +79,7 @@ class DlMallocSpace : public MemMapSpace, public AllocSpace {
 
   // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
   // in use, indicated by num_bytes equaling zero.
-  void Walk(WalkCallback callback, void* arg);
+  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
 
   // Returns the number of bytes that the space has currently obtained from the system. This is
   // greater or equal to the amount of live data in the space.
@@ -141,7 +149,8 @@ class DlMallocSpace : public MemMapSpace, public AllocSpace {
 
  private:
   size_t InternalAllocationSize(const mirror::Object* obj);
-  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
   bool Init(size_t initial_size, size_t maximum_size, size_t growth_size, byte* requested_base);
 
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index d7db561f61..a174c0a1dc 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -55,7 +55,7 @@ LargeObjectMapSpace* LargeObjectMapSpace::Create(const std::string& name) {
   return new LargeObjectMapSpace(name);
 }
 
-mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes) {
+mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", NULL, num_bytes,
                                          PROT_READ | PROT_WRITE);
   if (mem_map == NULL) {
@@ -66,6 +66,8 @@ mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes) {
   large_objects_.push_back(obj);
   mem_maps_.Put(obj, mem_map);
   size_t allocation_size = mem_map->Size();
+  DCHECK(bytes_allocated != NULL);
+  *bytes_allocated = allocation_size;
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   ++num_objects_allocated_;
@@ -138,89 +140,97 @@ FreeListSpace::FreeListSpace(const std::string& name, MemMap* mem_map, byte* beg
       end_(end),
       mem_map_(mem_map),
       lock_("free list space lock", kAllocSpaceLock) {
-  chunks_.resize(Size() / kAlignment + 1);
-  // Add a dummy chunk so we don't need to handle chunks having no next chunk.
-  chunks_.back().SetSize(kAlignment, false);
-  // Start out with one large free chunk.
-  AddFreeChunk(begin_, end_ - begin_, NULL);
+  free_end_ = end - begin;
 }
 
 FreeListSpace::~FreeListSpace() {}
 
-void FreeListSpace::AddFreeChunk(void* address, size_t size, Chunk* previous) {
-  Chunk* chunk = ChunkFromAddr(address);
-  chunk->SetSize(size, true);
-  chunk->SetPrevious(previous);
-  Chunk* next_chunk = GetNextChunk(chunk);
-  next_chunk->SetPrevious(chunk);
-  free_chunks_.insert(chunk);
+void FreeListSpace::Walk(DlMallocSpace::WalkCallback callback, void* arg) {
+  MutexLock mu(Thread::Current(), lock_);
+  uintptr_t free_end_start = reinterpret_cast<uintptr_t>(end_) - free_end_;
+  AllocationHeader* cur_header = reinterpret_cast<AllocationHeader*>(Begin());
+  while (reinterpret_cast<uintptr_t>(cur_header) < free_end_start) {
+    cur_header = cur_header->GetNextNonFree();
+    size_t alloc_size = cur_header->AllocationSize();
+    byte* byte_start = reinterpret_cast<byte*>(cur_header->GetObjectAddress());
+    byte* byte_end = byte_start + alloc_size - sizeof(AllocationHeader);
+    callback(byte_start, byte_end, alloc_size, arg);
+    callback(NULL, NULL, 0, arg);
+    cur_header = reinterpret_cast<AllocationHeader*>(byte_end);
+  }
 }
 
-FreeListSpace::Chunk* FreeListSpace::ChunkFromAddr(void* address) {
-  size_t offset = reinterpret_cast<byte*>(address) - Begin();
-  DCHECK(IsAligned<kAlignment>(offset));
-  DCHECK_LT(offset, Size());
-  return &chunks_[offset / kAlignment];
+void FreeListSpace::RemoveFreePrev(AllocationHeader* header) {
+  CHECK(!header->IsFree());
+  CHECK_GT(header->GetPrevFree(), size_t(0));
+  FreeBlocks::iterator found = free_blocks_.lower_bound(header);
+  CHECK(found != free_blocks_.end());
+  CHECK_EQ(*found, header);
+  free_blocks_.erase(found);
 }
 
-void* FreeListSpace::AddrFromChunk(Chunk* chunk) {
-  return reinterpret_cast<void*>(Begin() + (chunk - &chunks_.front()) * kAlignment);
+FreeListSpace::AllocationHeader* FreeListSpace::GetAllocationHeader(const mirror::Object* obj) {
+  DCHECK(Contains(obj));
+  return reinterpret_cast<AllocationHeader*>(reinterpret_cast<uintptr_t>(obj) -
+      sizeof(AllocationHeader));
 }
 
-void FreeListSpace::RemoveFreeChunk(Chunk* chunk) {
-  // TODO: C++0x
-  // TODO: Improve performance, this might be slow.
-  std::pair<FreeChunks::iterator, FreeChunks::iterator> range = free_chunks_.equal_range(chunk);
-  for (FreeChunks::iterator it = range.first; it != range.second; ++it) {
-    if (*it == chunk) {
-      free_chunks_.erase(it);
-      return;
-    }
-  }
-}
-
-void FreeListSpace::Walk(DlMallocSpace::WalkCallback callback, void* arg) {
-  MutexLock mu(Thread::Current(), lock_);
-  for (Chunk* chunk = &chunks_.front(); chunk < &chunks_.back(); ) {
-    if (!chunk->IsFree()) {
-      size_t size = chunk->GetSize();
-      void* begin = AddrFromChunk(chunk);
-      void* end = reinterpret_cast<void*>(reinterpret_cast<byte*>(begin) + size);
-      callback(begin, end, size, arg);
-      callback(NULL, NULL, 0, arg);
-    }
-    chunk = GetNextChunk(chunk);
+FreeListSpace::AllocationHeader* FreeListSpace::AllocationHeader::GetNextNonFree() {
+  // We know that there has to be at least one object after us or else we would have
+  // coalesced with the free end region. May be worth investigating a better way to do this
+  // as it may be expensive for large allocations.
+  for (uintptr_t pos = reinterpret_cast<uintptr_t>(this);; pos += kAlignment) {
+    AllocationHeader* cur = reinterpret_cast<AllocationHeader*>(pos);
+    if (!cur->IsFree()) return cur;
   }
 }
 
 size_t FreeListSpace::Free(Thread* self, mirror::Object* obj) {
   MutexLock mu(self, lock_);
-  CHECK(Contains(obj));
-  // Check adjacent chunks to see if we need to combine.
-  Chunk* chunk = ChunkFromAddr(obj);
-  CHECK(!chunk->IsFree());
-
-  size_t allocation_size = chunk->GetSize();
-  if (kIsDebugBuild) {
-    memset(obj, 0xEB, allocation_size);
-  }
-  madvise(obj, allocation_size, MADV_DONTNEED);
-  num_objects_allocated_--;
-  num_bytes_allocated_ -= allocation_size;
-  Chunk* prev = chunk->GetPrevious();
-  Chunk* next = GetNextChunk(chunk);
-
-  // Combine any adjacent free chunks
-  size_t extra_size = chunk->GetSize();
-  if (next->IsFree()) {
-    extra_size += next->GetSize();
-    RemoveFreeChunk(next);
+  DCHECK(Contains(obj));
+  AllocationHeader* header = GetAllocationHeader(obj);
+  CHECK(IsAligned<kAlignment>(header));
+  size_t allocation_size = header->AllocationSize();
+  DCHECK_GT(allocation_size, size_t(0));
+  DCHECK(IsAligned<kAlignment>(allocation_size));
+  // Look at the next chunk.
+  AllocationHeader* next_header = header->GetNextAllocationHeader();
+  // Calculate the start of the end free block.
+  uintptr_t free_end_start = reinterpret_cast<uintptr_t>(end_) - free_end_;
+  size_t header_prev_free = header->GetPrevFree();
+  size_t new_free_size = allocation_size;
+  if (header_prev_free) {
+    new_free_size += header_prev_free;
+    RemoveFreePrev(header);
   }
-  if (prev != NULL && prev->IsFree()) {
-    RemoveFreeChunk(prev);
-    AddFreeChunk(AddrFromChunk(prev), prev->GetSize() + extra_size, prev->GetPrevious());
+  if (reinterpret_cast<uintptr_t>(next_header) >= free_end_start) {
+    // Easy case, the next chunk is the end free region.
+    CHECK_EQ(reinterpret_cast<uintptr_t>(next_header), free_end_start);
+    free_end_ += new_free_size;
   } else {
-    AddFreeChunk(AddrFromChunk(chunk), extra_size, prev);
+    AllocationHeader* new_free_header;
+    DCHECK(IsAligned<kAlignment>(next_header));
+    if (next_header->IsFree()) {
+      // Find the next chunk by reading each page until we hit one with non-zero chunk.
+      AllocationHeader* next_next_header = next_header->GetNextNonFree();
+      DCHECK(IsAligned<kAlignment>(next_next_header));
+      DCHECK(IsAligned<kAlignment>(next_next_header->AllocationSize()));
+      RemoveFreePrev(next_next_header);
+      new_free_header = next_next_header;
+      new_free_size += next_next_header->GetPrevFree();
+    } else {
+      new_free_header = next_header;
+    }
+    new_free_header->prev_free_ = new_free_size;
+    free_blocks_.insert(new_free_header);
+  }
+  --num_objects_allocated_;
+  DCHECK_LE(allocation_size, num_bytes_allocated_);
+  num_bytes_allocated_ -= allocation_size;
+  madvise(header, allocation_size, MADV_DONTNEED);
+  if (kIsDebugBuild) {
+    // Can't disallow reads since we use them to find next chunks during coalescing.
+    mprotect(header, allocation_size, PROT_READ);
   }
   return allocation_size;
 }
@@ -229,50 +239,91 @@ bool FreeListSpace::Contains(const mirror::Object* obj) const {
   return mem_map_->HasAddress(obj);
 }
 
-FreeListSpace::Chunk* FreeListSpace::GetNextChunk(Chunk* chunk) {
-  return chunk + chunk->GetSize() / kAlignment;
-}
-
 size_t FreeListSpace::AllocationSize(const mirror::Object* obj) {
-  Chunk* chunk = ChunkFromAddr(const_cast<mirror::Object*>(obj));
-  CHECK(!chunk->IsFree());
-  return chunk->GetSize();
+  AllocationHeader* header = GetAllocationHeader(obj);
+  DCHECK(Contains(obj));
+  DCHECK(!header->IsFree());
+  return header->AllocationSize();
 }
 
-mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes) {
+mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   MutexLock mu(self, lock_);
-  num_bytes = RoundUp(num_bytes, kAlignment);
-  Chunk temp;
-  temp.SetSize(num_bytes);
+  size_t allocation_size = RoundUp(num_bytes + sizeof(AllocationHeader), kAlignment);
+  AllocationHeader temp;
+  temp.SetPrevFree(allocation_size);
+  temp.SetAllocationSize(0);
+  AllocationHeader* new_header;
   // Find the smallest chunk at least num_bytes in size.
-  FreeChunks::iterator found = free_chunks_.lower_bound(&temp);
-  if (found == free_chunks_.end()) {
-    // Out of memory, or too much fragmentation.
-    return NULL;
-  }
-  Chunk* chunk = *found;
-  free_chunks_.erase(found);
-  CHECK(chunk->IsFree());
-  void* addr = AddrFromChunk(chunk);
-  size_t chunk_size = chunk->GetSize();
-  chunk->SetSize(num_bytes);
-  if (chunk_size > num_bytes) {
-    // Split the chunk into two chunks.
-    Chunk* new_chunk = GetNextChunk(chunk);
-    AddFreeChunk(AddrFromChunk(new_chunk), chunk_size - num_bytes, chunk);
+  FreeBlocks::iterator found = free_blocks_.lower_bound(&temp);
+  if (found != free_blocks_.end()) {
+    AllocationHeader* header = *found;
+    free_blocks_.erase(found);
+
+    // Fit our object in the previous free header space.
+    new_header = header->GetPrevFreeAllocationHeader();
+
+    // Remove the newly allocated block from the header and update the prev_free_.
+    header->prev_free_ -= allocation_size;
+    if (header->prev_free_ > 0) {
+      // If there is remaining space, insert back into the free set.
+      free_blocks_.insert(header);
+    }
+  } else {
+    // Try to steal some memory from the free space at the end of the space.
+    if (LIKELY(free_end_ >= allocation_size)) {
+      // Fit our object at the start of the end free block.
+      new_header = reinterpret_cast<AllocationHeader*>(end_ - free_end_);
+      free_end_ -= allocation_size;
+    } else {
+      return NULL;
+    }
   }
 
-  num_objects_allocated_++;
-  total_objects_allocated_++;
-  num_bytes_allocated_ += num_bytes;
-  total_bytes_allocated_ += num_bytes;
-  return reinterpret_cast<mirror::Object*>(addr);
+  DCHECK(bytes_allocated != NULL);
+  *bytes_allocated = allocation_size;
+
+  // Need to do these inside of the lock.
+  ++num_objects_allocated_;
+  ++total_objects_allocated_;
+  num_bytes_allocated_ += allocation_size;
+  total_bytes_allocated_ += allocation_size;
+
+  // We always put our object at the start of the free block, there can not be another free block
+  // before it.
+  if (kIsDebugBuild) {
+    mprotect(new_header, allocation_size, PROT_READ | PROT_WRITE);
+  }
+  new_header->SetPrevFree(0);
+  new_header->SetAllocationSize(allocation_size);
+  return new_header->GetObjectAddress();
 }
 
 void FreeListSpace::Dump(std::ostream& os) const {
+  MutexLock mu(Thread::Current(), const_cast<Mutex&>(lock_));
   os << GetName() << " -"
      << " begin: " << reinterpret_cast<void*>(Begin())
-     << " end: " << reinterpret_cast<void*>(End());
+     << " end: " << reinterpret_cast<void*>(End()) << "\n";
+  uintptr_t free_end_start = reinterpret_cast<uintptr_t>(end_) - free_end_;
+  AllocationHeader* cur_header = reinterpret_cast<AllocationHeader*>(Begin());
+  while (reinterpret_cast<uintptr_t>(cur_header) < free_end_start) {
+    byte* free_start = reinterpret_cast<byte*>(cur_header);
+    cur_header = cur_header->GetNextNonFree();
+    byte* free_end = reinterpret_cast<byte*>(cur_header);
+    if (free_start != free_end) {
+      os << "Free block at address: " << reinterpret_cast<const void*>(free_start)
+         << " of length " << free_end - free_start << " bytes\n";
+    }
+    size_t alloc_size = cur_header->AllocationSize();
+    byte* byte_start = reinterpret_cast<byte*>(cur_header->GetObjectAddress());
+    byte* byte_end = byte_start + alloc_size - sizeof(AllocationHeader);
+    os << "Large object at address: " << reinterpret_cast<const void*>(free_start)
+       << " of length " << byte_end - byte_start << " bytes\n";
+    cur_header = reinterpret_cast<AllocationHeader*>(byte_end);
+  }
+  if (free_end_) {
+    os << "Free block at address: " << reinterpret_cast<const void*>(free_end_start)
+       << " of length " << free_end_ << " bytes\n";
+  }
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 8cd50880db..a703e86ea1 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -83,9 +83,9 @@ class LargeObjectMapSpace : public LargeObjectSpace {
 
   // Return the storage space required by obj.
   size_t AllocationSize(const mirror::Object* obj);
-  mirror::Object* Alloc(Thread* self, size_t num_bytes);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* ptr);
-  void Walk(DlMallocSpace::WalkCallback, void* arg);
+  void Walk(DlMallocSpace::WalkCallback, void* arg) LOCKS_EXCLUDED(lock_);
   // TODO: disabling thread safety analysis as this may be called when we already hold lock_.
   bool Contains(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS;
 
@@ -103,17 +103,16 @@ class LargeObjectMapSpace : public LargeObjectSpace {
 };
 
 // A continuous large object space with a free-list to handle holes.
-// TODO: this implementation is buggy.
 class FreeListSpace : public LargeObjectSpace {
  public:
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
   size_t AllocationSize(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  mirror::Object* Alloc(Thread* self, size_t num_bytes);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
-  void Walk(DlMallocSpace::WalkCallback callback, void* arg);
+  void Walk(DlMallocSpace::WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
 
   // Address at which the space begins.
   byte* Begin() const {
@@ -135,57 +134,100 @@ class FreeListSpace : public LargeObjectSpace {
  private:
   static const size_t kAlignment = kPageSize;
 
-  class Chunk {
+  class AllocationHeader {
    public:
-    static const size_t kFreeFlag = 0x80000000;
+    // Returns the allocation size, includes the header.
+    size_t AllocationSize() const {
+      return alloc_size_;
+    }
 
-    struct SortBySize {
-      bool operator()(const Chunk* a, const Chunk* b) const {
-        return a->GetSize() < b->GetSize();
-      }
-    };
+    // Updates the allocation size in the header, the allocation size includes the header itself.
+    void SetAllocationSize(size_t size) {
+      DCHECK(IsAligned<kPageSize>(size));
+      alloc_size_ = size;
+    }
 
     bool IsFree() const {
-      return (m_size & kFreeFlag) != 0;
+      return AllocationSize() == 0;
     }
 
-    void SetSize(size_t size, bool is_free = false) {
-      m_size = size | (is_free ? kFreeFlag : 0);
+    // Returns the previous free allocation header by using the prev_free_ member to figure out
+    // where it is. If prev free is 0 then we just return ourself.
+    AllocationHeader* GetPrevFreeAllocationHeader() {
+      return reinterpret_cast<AllocationHeader*>(reinterpret_cast<uintptr_t>(this) - prev_free_);
     }
 
-    size_t GetSize() const {
-      return m_size & (kFreeFlag - 1);
+    // Returns the address of the object associated with this allocation header.
+    mirror::Object* GetObjectAddress() {
+      return reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(this) + sizeof(*this));
     }
 
-    Chunk* GetPrevious() {
-      return m_previous;
+    // Returns the next allocation header after the object associated with this allocation header.
+    AllocationHeader* GetNextAllocationHeader() {
+      DCHECK_NE(alloc_size_, 0U);
+      return reinterpret_cast<AllocationHeader*>(reinterpret_cast<uintptr_t>(this) + alloc_size_);
     }
 
-    void SetPrevious(Chunk* previous) {
-      m_previous = previous;
-      DCHECK(m_previous == NULL ||
-            (m_previous != NULL && m_previous + m_previous->GetSize() / kAlignment == this));
+    // Returns how many free bytes there is before the block.
+    size_t GetPrevFree() const {
+      return prev_free_;
     }
 
+    // Update the size of the free block prior to the allocation.
+    void SetPrevFree(size_t prev_free) {
+      DCHECK(IsAligned<kPageSize>(prev_free));
+      prev_free_ = prev_free;
+    }
+
+    // Finds and returns the next non free allocation header after ourself.
+    // TODO: Optimize, currently O(n) for n free following pages.
+    AllocationHeader* GetNextNonFree();
+
+    // Used to implement best fit object allocation. Each allocation has an AllocationHeader which
+    // contains the size of the previous free block preceding it. Implemented in such a way that we
+    // can also find the iterator for any allocation header pointer.
+    class SortByPrevFree {
+     public:
+      bool operator()(const AllocationHeader* a, const AllocationHeader* b) const {
+        if (a->GetPrevFree() < b->GetPrevFree()) return true;
+        if (a->GetPrevFree() > b->GetPrevFree()) return false;
+        if (a->AllocationSize() < b->AllocationSize()) return true;
+        if (a->AllocationSize() > b->AllocationSize()) return false;
+        return reinterpret_cast<uintptr_t>(a) < reinterpret_cast<uintptr_t>(b);
+      }
+    };
+
    private:
-    size_t m_size;
-    Chunk* m_previous;
+    // Contains the size of the previous free block, if 0 then the memory preceding us is an
+    // allocation.
+    size_t prev_free_;
+
+    // Allocation size of this object, 0 means that the allocation header is free memory.
+    size_t alloc_size_;
+
+    friend class FreeListSpace;
   };
 
   FreeListSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end);
-  void AddFreeChunk(void* address, size_t size, Chunk* previous) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  Chunk* ChunkFromAddr(void* address) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void* AddrFromChunk(Chunk* chunk) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void RemoveFreeChunk(Chunk* chunk) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  Chunk* GetNextChunk(Chunk* chunk);
 
-  typedef std::multiset<Chunk*, Chunk::SortBySize> FreeChunks;
+  // Removes header from the free blocks set by finding the corresponding iterator and erasing it.
+  void RemoveFreePrev(AllocationHeader* header) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Finds the allocation header corresponding to obj.
+  AllocationHeader* GetAllocationHeader(const mirror::Object* obj);
+
+  typedef std::set<AllocationHeader*, AllocationHeader::SortByPrevFree,
+                   accounting::GCAllocator<AllocationHeader*> > FreeBlocks;
+
   byte* const begin_;
   byte* const end_;
+
+  // There is not footer for any allocations at the end of the space, so we keep track of how much
+  // free space there is at the end manually.
   UniquePtr<MemMap> mem_map_;
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::vector<Chunk> chunks_ GUARDED_BY(lock_);
-  FreeChunks free_chunks_ GUARDED_BY(lock_);
+  size_t free_end_ GUARDED_BY(lock_);
+  FreeBlocks free_blocks_ GUARDED_BY(lock_);
 };
 
 }  // namespace space
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index bc6e8185b1..231cabc45b 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -154,8 +154,10 @@ class AllocSpace {
   // Number of objects allocated since the space was created.
   virtual uint64_t GetTotalObjectsAllocated() const = 0;
 
-  // Allocate num_bytes without allowing growth.
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes) = 0;
+  // Allocate num_bytes without allowing growth. If the allocation
+  // succeeds, the output parameter bytes_allocated will be set to the
+  // actually allocated bytes which is >= num_bytes.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
 
   // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj) = 0;
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 3003140e5d..455168c90f 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "dlmalloc_space.h"
+#include "large_object_space.h"
 
 #include "common_test.h"
 #include "globals.h"
@@ -37,6 +38,11 @@ class SpaceTest : public CommonTest {
   }
 };
 
+static size_t test_rand(size_t* seed) {
+  *seed = *seed * 1103515245 + 12345;
+  return *seed;
+}
+
 TEST_F(SpaceTest, Init) {
   {
     // Init < max == growth
@@ -80,6 +86,7 @@ TEST_F(SpaceTest, Init) {
 // allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
 // the GC works with the ZygoteSpace.
 TEST_F(SpaceTest, ZygoteSpace) {
+    size_t dummy = 0;
     DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
     ASSERT_TRUE(space != NULL);
 
@@ -88,32 +95,35 @@ TEST_F(SpaceTest, ZygoteSpace) {
     Thread* self = Thread::Current();
 
     // Succeeds, fits without adjusting the footprint limit.
-    mirror::Object* ptr1 = space->Alloc(self, 1 * MB);
+    mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
 
     // Fails, requires a higher footprint limit.
-    mirror::Object* ptr2 = space->Alloc(self, 8 * MB);
+    mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr2 == NULL);
 
     // Succeeds, adjusts the footprint.
-    mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB);
+    size_t ptr3_bytes_allocated;
+    mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
     EXPECT_TRUE(ptr3 != NULL);
+    EXPECT_LE(8U * MB, ptr3_bytes_allocated);
 
     // Fails, requires a higher footprint limit.
-    mirror::Object* ptr4 = space->Alloc(self, 8 * MB);
+    mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr4 == NULL);
 
     // Also fails, requires a higher allowed footprint.
-    mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB);
+    mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr5 == NULL);
 
     // Release some memory.
     size_t free3 = space->AllocationSize(ptr3);
+    EXPECT_EQ(free3, ptr3_bytes_allocated);
     EXPECT_EQ(free3, space->Free(self, ptr3));
     EXPECT_LE(8U * MB, free3);
 
     // Succeeds, now that memory has been freed.
-    void* ptr6 = space->AllocWithGrowth(self, 9 * MB);
+    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
     EXPECT_TRUE(ptr6 != NULL);
 
     // Final clean up.
@@ -122,22 +132,22 @@ TEST_F(SpaceTest, ZygoteSpace) {
     EXPECT_LE(1U * MB, free1);
 
     // Make sure that the zygote space isn't directly at the start of the space.
-    space->Alloc(self, 1U * MB);
+    space->Alloc(self, 1U * MB, &dummy);
     space = space->CreateZygoteSpace("alloc space");
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
     AddContinuousSpace(space);
 
     // Succeeds, fits without adjusting the footprint limit.
-    ptr1 = space->Alloc(self, 1 * MB);
+    ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
 
     // Fails, requires a higher footprint limit.
-    ptr2 = space->Alloc(self, 8 * MB);
+    ptr2 = space->Alloc(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr2 == NULL);
 
     // Succeeds, adjusts the footprint.
-    ptr3 = space->AllocWithGrowth(self, 2 * MB);
+    ptr3 = space->AllocWithGrowth(self, 2 * MB, &dummy);
     EXPECT_TRUE(ptr3 != NULL);
     space->Free(self, ptr3);
 
@@ -148,6 +158,7 @@ TEST_F(SpaceTest, ZygoteSpace) {
 }
 
 TEST_F(SpaceTest, AllocAndFree) {
+  size_t dummy = 0;
   DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
   ASSERT_TRUE(space != NULL);
   Thread* self = Thread::Current();
@@ -156,32 +167,35 @@ TEST_F(SpaceTest, AllocAndFree) {
   AddContinuousSpace(space);
 
   // Succeeds, fits without adjusting the footprint limit.
-  mirror::Object* ptr1 = space->Alloc(self, 1 * MB);
+  mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
   EXPECT_TRUE(ptr1 != NULL);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = space->Alloc(self, 8 * MB);
+  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
   EXPECT_TRUE(ptr2 == NULL);
 
   // Succeeds, adjusts the footprint.
-  mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB);
+  size_t ptr3_bytes_allocated;
+  mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
   EXPECT_TRUE(ptr3 != NULL);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB);
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
   EXPECT_TRUE(ptr4 == NULL);
 
   // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB);
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy);
   EXPECT_TRUE(ptr5 == NULL);
 
   // Release some memory.
   size_t free3 = space->AllocationSize(ptr3);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
   space->Free(self, ptr3);
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  void* ptr6 = space->AllocWithGrowth(self, 9 * MB);
+  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
   EXPECT_TRUE(ptr6 != NULL);
 
   // Final clean up.
@@ -190,6 +204,67 @@ TEST_F(SpaceTest, AllocAndFree) {
   EXPECT_LE(1U * MB, free1);
 }
 
+TEST_F(SpaceTest, LargeObjectTest) {
+  size_t rand_seed = 0;
+  for (size_t i = 0; i < 2; ++i) {
+    LargeObjectSpace* los = NULL;
+    if (i == 0) {
+      los = space::LargeObjectMapSpace::Create("large object space");
+    } else {
+      los = space::FreeListSpace::Create("large object space", NULL, 128 * MB);
+    }
+
+    static const size_t num_allocations = 64;
+    static const size_t max_allocation_size = 0x100000;
+    std::vector<std::pair<mirror::Object*, size_t> > requests;
+
+    for (size_t phase = 0; phase < 2; ++phase) {
+      while (requests.size() < num_allocations) {
+        size_t request_size = test_rand(&rand_seed) % max_allocation_size;
+        size_t allocation_size = 0;
+        mirror::Object* obj = los->Alloc(Thread::Current(), request_size, &allocation_size);
+        ASSERT_TRUE(obj != NULL);
+        ASSERT_EQ(allocation_size, los->AllocationSize(obj));
+        ASSERT_GE(allocation_size, request_size);
+        // Fill in our magic value.
+        byte magic = (request_size & 0xFF) | 1;
+        memset(obj, magic, request_size);
+        requests.push_back(std::make_pair(obj, request_size));
+      }
+
+      // "Randomly" shuffle the requests.
+      for (size_t k = 0; k < 10; ++k) {
+        for (size_t j = 0; j < requests.size(); ++j) {
+          std::swap(requests[j], requests[test_rand(&rand_seed) % requests.size()]);
+        }
+      }
+
+      // Free 1 / 2 the allocations the first phase, and all the second phase.
+      size_t limit = !phase ? requests.size() / 2 : 0;
+      while (requests.size() > limit) {
+        mirror::Object* obj = requests.back().first;
+        size_t request_size = requests.back().second;
+        requests.pop_back();
+        byte magic = (request_size & 0xFF) | 1;
+        for (size_t k = 0; k < request_size; ++k) {
+          ASSERT_EQ(reinterpret_cast<const byte*>(obj)[k], magic);
+        }
+        ASSERT_GE(los->Free(Thread::Current(), obj), request_size);
+      }
+    }
+
+    size_t bytes_allocated = 0;
+    // Checks that the coalescing works.
+    mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated);
+    EXPECT_TRUE(obj != NULL);
+    los->Free(Thread::Current(), obj);
+
+    EXPECT_EQ(0U, los->GetBytesAllocated());
+    EXPECT_EQ(0U, los->GetObjectsAllocated());
+    delete los;
+  }
+}
+
 TEST_F(SpaceTest, AllocAndFreeList) {
   DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
   ASSERT_TRUE(space != NULL);
@@ -201,7 +276,9 @@ TEST_F(SpaceTest, AllocAndFreeList) {
   // Succeeds, fits without adjusting the max allowed footprint.
   mirror::Object* lots_of_objects[1024];
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    lots_of_objects[i] = space->Alloc(self, 16);
+    size_t allocation_size = 0;
+    lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
   }
 
@@ -213,7 +290,9 @@ TEST_F(SpaceTest, AllocAndFreeList) {
 
   // Succeeds, fits by adjusting the max allowed footprint.
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    lots_of_objects[i] = space->AllocWithGrowth(self, 1024);
+    size_t allocation_size = 0;
+    lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
   }
 
@@ -224,11 +303,6 @@ TEST_F(SpaceTest, AllocAndFreeList) {
   }
 }
 
-static size_t test_rand() {
-  // TODO: replace this with something random yet deterministic
-  return rand();
-}
-
 void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(DlMallocSpace* space, intptr_t object_size,
                                                     int round, size_t growth_limit) {
   if (((object_size > 0 && object_size >= static_cast<intptr_t>(growth_limit))) ||
@@ -261,6 +335,7 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(DlMallocSpace* space, intptr
   size_t last_object = 0;  // last object for which allocation succeeded
   size_t amount_allocated = 0;  // amount of space allocated
   Thread* self = Thread::Current();
+  size_t rand_seed = 123456789;
   for (size_t i = 0; i < max_objects; i++) {
     size_t alloc_fails = 0;  // number of failed allocations
     size_t max_fails = 30;  // number of times we fail allocation before giving up
@@ -269,22 +344,24 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(DlMallocSpace* space, intptr
       if (object_size > 0) {
         alloc_size = object_size;
       } else {
-        alloc_size = test_rand() % static_cast<size_t>(-object_size);
+        alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
         if (alloc_size < 8) {
           alloc_size = 8;
         }
       }
       mirror::Object* object;
+      size_t bytes_allocated = 0;
       if (round <= 1) {
-        object = space->Alloc(self, alloc_size);
+        object = space->Alloc(self, alloc_size, &bytes_allocated);
       } else {
-        object = space->AllocWithGrowth(self, alloc_size);
+        object = space->AllocWithGrowth(self, alloc_size, &bytes_allocated);
       }
       footprint = mspace_footprint(mspace);
       EXPECT_GE(space->Size(), footprint);  // invariant
       if (object != NULL) {  // allocation succeeded
         lots_of_objects.get()[i] = object;
         size_t allocation_size = space->AllocationSize(object);
+        EXPECT_EQ(bytes_allocated, allocation_size);
         if (object_size > 0) {
           EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
         } else {
@@ -354,10 +431,11 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(DlMallocSpace* space, intptr
   // All memory was released, try a large allocation to check freed memory is being coalesced
   mirror::Object* large_object;
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
+  size_t bytes_allocated = 0;
   if (round <= 1) {
-    large_object = space->Alloc(self, three_quarters_space);
+    large_object = space->Alloc(self, three_quarters_space, &bytes_allocated);
   } else {
-    large_object = space->AllocWithGrowth(self, three_quarters_space);
+    large_object = space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated);
   }
   EXPECT_TRUE(large_object != NULL);
 
diff --git a/runtime/image_test.cc b/runtime/image_test.cc
index 22bed2e3d2..334f7abfd3 100644
--- a/runtime/image_test.cc
+++ b/runtime/image_test.cc
@@ -46,6 +46,11 @@ TEST_F(ImageTest, WriteRead) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       base::TimingLogger timings("ImageTest::WriteRead", false, false);
       timings.StartSplit("CompileAll");
+#if defined(ART_USE_PORTABLE_COMPILER)
+      // TODO: we disable this for portable so the test executes in a reasonable amount of time.
+      //       We shouldn't need to do this.
+      runtime_->SetCompilerFilter(Runtime::kInterpretOnly);
+#endif
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings);
 
       ScopedObjectAccess soa(Thread::Current());
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 6681d5615d..d1de6e6f5a 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2853,10 +2853,11 @@ bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_lo
 
   VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader << "]";
 
-  bool result = true;
+  bool was_successful = false;
   void* sym = dlsym(handle, "JNI_OnLoad");
   if (sym == NULL) {
     VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]";
+    was_successful = true;
   } else {
     // Call JNI_OnLoad.  We have to override the current class
     // loader, which will always be "null" since the stuff at the
@@ -2876,7 +2877,9 @@ bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_lo
 
     self->SetClassLoaderOverride(old_class_loader);
 
-    if (IsBadJniVersion(version)) {
+    if (version == JNI_ERR) {
+      StringAppendF(&detail, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str());
+    } else if (IsBadJniVersion(version)) {
       StringAppendF(&detail, "Bad JNI version returned from JNI_OnLoad in \"%s\": %d",
                     path.c_str(), version);
       // It's unwise to call dlclose() here, but we can mark it
@@ -2885,14 +2888,15 @@ bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_lo
       // be some partially-initialized stuff accessible through
       // newly-registered native method calls.  We could try to
       // unregister them, but that doesn't seem worthwhile.
-      result = false;
+    } else {
+      was_successful = true;
     }
-    VLOG(jni) << "[Returned " << (result ? "successfully" : "failure")
+    VLOG(jni) << "[Returned " << (was_successful ? "successfully" : "failure")
               << " from JNI_OnLoad in \"" << path << "\"]";
   }
 
-  library->SetResult(result);
-  return result;
+  library->SetResult(was_successful);
+  return was_successful;
 }
 
 void* JavaVMExt::FindCodeForNativeMethod(AbstractMethod* m) {
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 97126cba4c..c64caa82a6 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -101,7 +101,8 @@ int32_t String::GetLength() const {
 uint16_t String::CharAt(int32_t index) const {
   // TODO: do we need this? Equals is the only caller, and could
   // bounds check itself.
-  if (index < 0 || index >= count_) {
+  DCHECK_GE(count_, 0);  // ensures the unsigned comparison is safe.
+  if (UNLIKELY(static_cast<uint32_t>(index) >= static_cast<uint32_t>(count_))) {
     Thread* self = Thread::Current();
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
     self->ThrowNewExceptionF(throw_location, "Ljava/lang/StringIndexOutOfBoundsException;",
diff --git a/runtime/stack.cc b/runtime/stack.cc
index b07a24ef2a..e1a752adb9 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -267,7 +267,11 @@ void StackVisitor::SanityCheckFrame() const {
     // Frame sanity.
     size_t frame_size = method->GetFrameSizeInBytes();
     CHECK_NE(frame_size, 0u);
-    CHECK_LT(frame_size, 1024u);
+    // A rough guess at an upper size we expect to see for a frame. The 256 is
+    // a dex register limit. The 16 incorporates callee save spills and
+    // outgoing argument set up.
+    const size_t kMaxExpectedFrameSize = 256 * sizeof(word) + 16;
+    CHECK_LE(frame_size, kMaxExpectedFrameSize);
     size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
     CHECK_LT(return_pc_offset, frame_size);
   }
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d10dc733c3..eb6e3c3453 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -2819,8 +2819,10 @@ const RegType& MethodVerifier::ResolveClassAndCheckAccess(uint32_t class_idx) {
     dex_cache_->SetResolvedType(class_idx, result.GetClass());
   }
   // Check if access is allowed. Unresolved types use xxxWithAccessCheck to
-  // check at runtime if access is allowed and so pass here.
-  if (!result.IsUnresolvedTypes() && !referrer.IsUnresolvedTypes() && !referrer.CanAccess(result)) {
+  // check at runtime if access is allowed and so pass here. If result is
+  // primitive, skip the access check.
+  if (result.IsNonZeroReferenceTypes() && !result.IsUnresolvedTypes() &&
+      !referrer.IsUnresolvedTypes() && !referrer.CanAccess(result)) {
     Fail(VERIFY_ERROR_ACCESS_CLASS) << "illegal class access: '"
                                     << referrer << "' -> '" << result << "'";
   }
@@ -3297,6 +3299,43 @@ void MethodVerifier::VerifyAGet(const Instruction* inst,
   }
 }
 
+void MethodVerifier::VerifyPrimitivePut(const RegType& target_type, const RegType& insn_type,
+                                        const uint32_t vregA) {
+  // Primitive assignability rules are weaker than regular assignability rules.
+  bool instruction_compatible;
+  bool value_compatible;
+  const RegType& value_type = work_line_->GetRegisterType(vregA);
+  if (target_type.IsIntegralTypes()) {
+    instruction_compatible = target_type.Equals(insn_type);
+    value_compatible = value_type.IsIntegralTypes();
+  } else if (target_type.IsFloat()) {
+    instruction_compatible = insn_type.IsInteger();  // no put-float, so expect put-int
+    value_compatible = value_type.IsFloatTypes();
+  } else if (target_type.IsLong()) {
+    instruction_compatible = insn_type.IsLong();
+    value_compatible = value_type.IsLongTypes();
+  } else if (target_type.IsDouble()) {
+    instruction_compatible = insn_type.IsLong();  // no put-double, so expect put-long
+    value_compatible = value_type.IsDoubleTypes();
+  } else {
+    instruction_compatible = false;  // reference with primitive store
+    value_compatible = false;  // unused
+  }
+  if (!instruction_compatible) {
+    // This is a global failure rather than a class change failure as the instructions and
+    // the descriptors for the type should have been consistent within the same file at
+    // compile time.
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "put insn has type '" << insn_type
+        << "' but expected type '" << target_type << "'";
+    return;
+  }
+  if (!value_compatible) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unexpected value in v" << vregA
+        << " of type " << value_type << " but expected " << target_type << " for put";
+    return;
+  }
+}
+
 void MethodVerifier::VerifyAPut(const Instruction* inst,
                              const RegType& insn_type, bool is_primitive) {
   const RegType& index_type = work_line_->GetRegisterType(inst->VRegC_23x());
@@ -3310,25 +3349,20 @@ void MethodVerifier::VerifyAPut(const Instruction* inst,
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
     } else {
-      /* verify the class */
       const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_);
-      if (!component_type.IsReferenceTypes() && !is_primitive) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "primitive array type " << array_type
-            << " source for aput-object";
-      } else if (component_type.IsNonZeroReferenceTypes() && is_primitive) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "reference array type " << array_type
-            << " source for category 1 aput";
-      } else if (is_primitive && !insn_type.Equals(component_type) &&
-                 !((insn_type.IsInteger() && component_type.IsFloat()) ||
-                   (insn_type.IsLong() && component_type.IsDouble()))) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "array type " << array_type
-            << " incompatible with aput of type " << insn_type;
+      const uint32_t vregA = inst->VRegA_23x();
+      if (is_primitive) {
+        VerifyPrimitivePut(component_type, insn_type, vregA);
       } else {
-        // The instruction agrees with the type of array, confirm the value to be stored does too
-        // Note: we use the instruction type (rather than the component type) for aput-object as
-        // incompatible classes will be caught at runtime as an array store exception
-        work_line_->VerifyRegisterType(inst->VRegA_23x(),
-                                       is_primitive ? component_type : insn_type);
+        if (!component_type.IsReferenceTypes()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "primitive array type " << array_type
+              << " source for aput-object";
+        } else {
+          // The instruction agrees with the type of array, confirm the value to be stored does too
+          // Note: we use the instruction type (rather than the component type) for aput-object as
+          // incompatible classes will be caught at runtime as an array store exception
+          work_line_->VerifyRegisterType(vregA, insn_type);
+        }
       }
     }
   }
@@ -3458,8 +3492,8 @@ void MethodVerifier::VerifyISGet(const Instruction* inst, const RegType& insn_ty
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
     if (field_type.Equals(insn_type) ||
-        (field_type.IsFloat() && insn_type.IsIntegralTypes()) ||
-        (field_type.IsDouble() && insn_type.IsLongTypes())) {
+        (field_type.IsFloat() && insn_type.IsInteger()) ||
+        (field_type.IsDouble() && insn_type.IsLong())) {
       // expected that read is of the correct primitive type or that int reads are reading
       // floats or long reads are reading doubles
     } else {
@@ -3518,43 +3552,7 @@ void MethodVerifier::VerifyISPut(const Instruction* inst, const RegType& insn_ty
   }
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
-    // Primitive field assignability rules are weaker than regular assignability rules
-    bool instruction_compatible;
-    bool value_compatible;
-    const RegType& value_type = work_line_->GetRegisterType(vregA);
-    if (field_type.IsIntegralTypes()) {
-      instruction_compatible = insn_type.IsIntegralTypes();
-      value_compatible = value_type.IsIntegralTypes();
-    } else if (field_type.IsFloat()) {
-      instruction_compatible = insn_type.IsInteger();  // no [is]put-float, so expect [is]put-int
-      value_compatible = value_type.IsFloatTypes();
-    } else if (field_type.IsLong()) {
-      instruction_compatible = insn_type.IsLong();
-      value_compatible = value_type.IsLongTypes();
-    } else if (field_type.IsDouble()) {
-      instruction_compatible = insn_type.IsLong();  // no [is]put-double, so expect [is]put-long
-      value_compatible = value_type.IsDoubleTypes();
-    } else {
-      instruction_compatible = false;  // reference field with primitive store
-      value_compatible = false;  // unused
-    }
-    if (!instruction_compatible) {
-      // This is a global failure rather than a class change failure as the instructions and
-      // the descriptors for the type should have been consistent within the same file at
-      // compile time
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << PrettyField(field)
-                                        << " to be of type '" << insn_type
-                                        << "' but found type '" << field_type
-                                        << "' in put";
-      return;
-    }
-    if (!value_compatible) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unexpected value in v" << vregA
-          << " of type " << value_type
-          << " but expected " << field_type
-          << " for store to " << PrettyField(field) << " in put";
-      return;
-    }
+    VerifyPrimitivePut(field_type, insn_type, vregA);
   } else {
     if (!insn_type.IsAssignableFrom(field_type)) {
       Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << PrettyField(field)
@@ -3756,6 +3754,10 @@ bool MethodVerifier::UpdateRegisters(uint32_t next_insn, const RegisterLine* mer
     if (!insn_flags_[next_insn].IsReturn()) {
       target_line->CopyFromLine(merge_line);
     } else {
+      // Verify that the monitor stack is empty on return.
+      if (!merge_line->VerifyMonitorStackEmpty()) {
+        return false;
+      }
       // For returns we only care about the operand to the return, all other registers are dead.
       // Initialize them as conflicts so they don't add to GC and deoptimization information.
       const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn);
@@ -4061,20 +4063,19 @@ void MethodVerifier::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>
 
 void  MethodVerifier::SetSafeCastMap(MethodReference ref, const MethodSafeCastSet* cast_set) {
   DCHECK(Runtime::Current()->IsCompiler());
-  MutexLock mu(Thread::Current(), *safecast_map_lock_);
+  WriterMutexLock mu(Thread::Current(), *safecast_map_lock_);
   SafeCastMap::iterator it = safecast_map_->find(ref);
   if (it != safecast_map_->end()) {
     delete it->second;
     safecast_map_->erase(it);
   }
-
   safecast_map_->Put(ref, cast_set);
   DCHECK(safecast_map_->find(ref) != safecast_map_->end());
 }
 
 bool MethodVerifier::IsSafeCast(MethodReference ref, uint32_t pc) {
   DCHECK(Runtime::Current()->IsCompiler());
-  MutexLock mu(Thread::Current(), *safecast_map_lock_);
+  ReaderMutexLock mu(Thread::Current(), *safecast_map_lock_);
   SafeCastMap::const_iterator it = safecast_map_->find(ref);
   if (it == safecast_map_->end()) {
     return false;
@@ -4186,7 +4187,7 @@ bool MethodVerifier::IsCandidateForCompilation(const DexFile::CodeItem* code_ite
 ReaderWriterMutex* MethodVerifier::dex_gc_maps_lock_ = NULL;
 MethodVerifier::DexGcMapTable* MethodVerifier::dex_gc_maps_ = NULL;
 
-Mutex* MethodVerifier::safecast_map_lock_ = NULL;
+ReaderWriterMutex* MethodVerifier::safecast_map_lock_ = NULL;
 MethodVerifier::SafeCastMap* MethodVerifier::safecast_map_ = NULL;
 
 ReaderWriterMutex* MethodVerifier::devirt_maps_lock_ = NULL;
@@ -4204,9 +4205,9 @@ void MethodVerifier::Init() {
       dex_gc_maps_ = new MethodVerifier::DexGcMapTable;
     }
 
-    safecast_map_lock_ = new Mutex("verifier Cast Elision lock");
+    safecast_map_lock_ = new ReaderWriterMutex("verifier Cast Elision lock");
     {
-      MutexLock mu(self, *safecast_map_lock_);
+      WriterMutexLock mu(self, *safecast_map_lock_);
       safecast_map_ = new MethodVerifier::SafeCastMap();
     }
 
@@ -4239,7 +4240,7 @@ void MethodVerifier::Shutdown() {
     dex_gc_maps_lock_ = NULL;
 
     {
-      MutexLock mu(self, *safecast_map_lock_);
+      WriterMutexLock mu(self, *safecast_map_lock_);
       STLDeleteValues(safecast_map_);
       delete safecast_map_;
       safecast_map_ = NULL;
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 3f98a00adc..e01f2c08c1 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -230,6 +230,10 @@ class MethodVerifier {
                  uint32_t access_flags, bool can_load_classes, bool allow_soft_failures)
           SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  ~MethodVerifier() {
+    STLDeleteElements(&failure_messages_);
+  }
+
   // Run verification on the method. Returns true if verification completes and false if the input
   // has an irrecoverable corruption.
   bool Verify() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -476,6 +480,10 @@ class MethodVerifier {
   void VerifyNewArray(const Instruction* inst, bool is_filled, bool is_range)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Helper to perform verification on puts of primitive type.
+  void VerifyPrimitivePut(const RegType& target_type, const RegType& insn_type,
+                          const uint32_t vregA) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Perform verification of an aget instruction. The destination register's type will be set to
   // be that of component type of the array unless the array type is unknown, in which case a
   // bottom type inferred from the type of instruction is used. is_primitive is false for an
@@ -640,7 +648,7 @@ class MethodVerifier {
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void SetSafeCastMap(MethodReference ref, const MethodSafeCastSet* mscs);
       LOCKS_EXCLUDED(safecast_map_lock_);
-  static Mutex* safecast_map_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  static ReaderWriterMutex* safecast_map_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   static SafeCastMap* safecast_map_ GUARDED_BY(safecast_map_lock_);
 
   // Devirtualization map.
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 7965c0641e..24a626b277 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -38,7 +38,7 @@ bool RegisterLine::CheckConstructorReturn() const {
 bool RegisterLine::SetRegisterType(uint32_t vdst, const RegType& new_type) {
   DCHECK_LT(vdst, num_regs_);
   if (new_type.IsLowHalf() || new_type.IsHighHalf()) {
-    verifier_->Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "Expected category1 register type not '"
+    verifier_->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Expected category1 register type not '"
         << new_type << "'";
     return false;
   } else if (new_type.IsConflict()) {  // should only be set during a merge
@@ -448,7 +448,7 @@ void RegisterLine::PopMonitor(uint32_t reg_idx) {
   }
 }
 
-bool RegisterLine::VerifyMonitorStackEmpty() {
+bool RegisterLine::VerifyMonitorStackEmpty() const {
   if (MonitorStackDepth() != 0) {
     verifier_->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected empty monitor stack";
     return false;
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index f3808776f2..f19dccac17 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -268,7 +268,7 @@ class RegisterLine {
 
   // We expect no monitors to be held at certain points, such a method returns. Verify the stack
   // is empty, failing and returning false if not.
-  bool VerifyMonitorStackEmpty();
+  bool VerifyMonitorStackEmpty() const;
 
   bool MergeRegisters(const RegisterLine* incoming_line)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);