11 files changed, 2023 insertions, 340 deletions
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 763d6da6f5..f8bbf68c1c 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -17,30 +17,15 @@
 #include <functional>
 #include <memory>
 
-#include "arch/instruction_set.h"
-#include "arch/arm/instruction_set_features_arm.h"
-#include "arch/arm/registers_arm.h"
-#include "arch/arm64/instruction_set_features_arm64.h"
-#include "arch/mips/instruction_set_features_mips.h"
-#include "arch/mips/registers_mips.h"
-#include "arch/mips64/instruction_set_features_mips64.h"
-#include "arch/mips64/registers_mips64.h"
-#include "arch/x86/instruction_set_features_x86.h"
-#include "arch/x86/registers_x86.h"
-#include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "base/macros.h"
 #include "builder.h"
-#include "code_simulator_container.h"
-#include "common_compiler_test.h"
+#include "codegen_test_utils.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "driver/compiler_options.h"
-#include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
-#include "prepare_for_register_allocation.h"
 #include "register_allocator_linear_scan.h"
-#include "ssa_liveness_analysis.h"
 #include "utils.h"
 #include "utils/arm/assembler_arm_vixl.h"
 #include "utils/arm/managed_register_arm.h"
@@ -48,324 +33,10 @@
 #include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
 
-#ifdef ART_ENABLE_CODEGEN_arm
-#include "code_generator_arm.h"
-#include "code_generator_arm_vixl.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-#include "code_generator_arm64.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-#include "code_generator_x86.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86_64
-#include "code_generator_x86_64.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips
-#include "code_generator_mips.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips64
-#include "code_generator_mips64.h"
-#endif
-
 #include "gtest/gtest.h"
 
 namespace art {
 
-typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
-
-class CodegenTargetConfig {
- public:
-  CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen)
-      : isa_(isa), create_codegen_(create_codegen) {
-  }
-  InstructionSet GetInstructionSet() const { return isa_; }
-  CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) {
-    return create_codegen_(graph, compiler_options);
-  }
-
- private:
-  CodegenTargetConfig() {}
-  InstructionSet isa_;
-  CreateCodegenFn create_codegen_;
-};
-
-#ifdef ART_ENABLE_CODEGEN_arm
-// Provide our own codegen, that ensures the C calling conventions
-// are preserved. Currently, ART and C do not match as R4 is caller-save
-// in ART, and callee-save in C. Alternatively, we could use or write
-// the stub that saves and restores all registers, but it is easier
-// to just overwrite the code generator.
-class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
- public:
-  TestCodeGeneratorARM(HGraph* graph,
-                       const ArmInstructionSetFeatures& isa_features,
-                       const CompilerOptions& compiler_options)
-      : arm::CodeGeneratorARM(graph, isa_features, compiler_options) {
-    AddAllocatedRegister(Location::RegisterLocation(arm::R6));
-    AddAllocatedRegister(Location::RegisterLocation(arm::R7));
-  }
-
-  void SetupBlockedRegisters() const OVERRIDE {
-    arm::CodeGeneratorARM::SetupBlockedRegisters();
-    blocked_core_registers_[arm::R4] = true;
-    blocked_core_registers_[arm::R6] = false;
-    blocked_core_registers_[arm::R7] = false;
-  }
-};
-
-// A way to test the VIXL32-based code generator on ARM. This will replace
-// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one.
-class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
- public:
-  TestCodeGeneratorARMVIXL(HGraph* graph,
-                           const ArmInstructionSetFeatures& isa_features,
-                           const CompilerOptions& compiler_options)
-      : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
-    AddAllocatedRegister(Location::RegisterLocation(arm::R6));
-    AddAllocatedRegister(Location::RegisterLocation(arm::R7));
-  }
-
-  void SetupBlockedRegisters() const OVERRIDE {
-    arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
-    blocked_core_registers_[arm::R4] = true;
-    blocked_core_registers_[arm::R6] = false;
-    blocked_core_registers_[arm::R7] = false;
-  }
-};
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
- public:
-  TestCodeGeneratorX86(HGraph* graph,
-                       const X86InstructionSetFeatures& isa_features,
-                       const CompilerOptions& compiler_options)
-      : x86::CodeGeneratorX86(graph, isa_features, compiler_options) {
-    // Save edi, we need it for getting enough registers for long multiplication.
-    AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
-  }
-
-  void SetupBlockedRegisters() const OVERRIDE {
-    x86::CodeGeneratorX86::SetupBlockedRegisters();
-    // ebx is a callee-save register in C, but caller-save for ART.
-    blocked_core_registers_[x86::EBX] = true;
-
-    // Make edi available.
-    blocked_core_registers_[x86::EDI] = false;
-  }
-};
-#endif
-
-class InternalCodeAllocator : public CodeAllocator {
- public:
-  InternalCodeAllocator() : size_(0) { }
-
-  virtual uint8_t* Allocate(size_t size) {
-    size_ = size;
-    memory_.reset(new uint8_t[size]);
-    return memory_.get();
-  }
-
-  size_t GetSize() const { return size_; }
-  uint8_t* GetMemory() const { return memory_.get(); }
-
- private:
-  size_t size_;
-  std::unique_ptr<uint8_t[]> memory_;
-
-  DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
-};
-
-static bool CanExecuteOnHardware(InstructionSet target_isa) {
-  return (target_isa == kRuntimeISA)
-      // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
-      || (kRuntimeISA == kArm && target_isa == kThumb2);
-}
-
-static bool CanExecute(InstructionSet target_isa) {
-  CodeSimulatorContainer simulator(target_isa);
-  return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
-}
-
-template <typename Expected>
-static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
-
-template <>
-bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
-  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
-  return simulator->GetCReturnBool();
-}
-
-template <>
-int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
-  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
-  return simulator->GetCReturnInt32();
-}
-
-template <>
-int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
-  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
-  return simulator->GetCReturnInt64();
-}
-
-template <typename Expected>
-static void VerifyGeneratedCode(InstructionSet target_isa,
-                                Expected (*f)(),
-                                bool has_result,
-                                Expected expected) {
-  ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
-
-  // Verify on simulator.
-  CodeSimulatorContainer simulator(target_isa);
-  if (simulator.CanSimulate()) {
-    Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
-    if (has_result) {
-      ASSERT_EQ(expected, result);
-    }
-  }
-
-  // Verify on hardware.
-  if (CanExecuteOnHardware(target_isa)) {
-    Expected result = f();
-    if (has_result) {
-      ASSERT_EQ(expected, result);
-    }
-  }
-}
-
-template <typename Expected>
-static void Run(const InternalCodeAllocator& allocator,
-                const CodeGenerator& codegen,
-                bool has_result,
-                Expected expected) {
-  InstructionSet target_isa = codegen.GetInstructionSet();
-
-  typedef Expected (*fptr)();
-  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
-  if (target_isa == kThumb2) {
-    // For thumb we need the bottom bit set.
-    f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
-  }
-  VerifyGeneratedCode(target_isa, f, has_result, expected);
-}
-
-static void ValidateGraph(HGraph* graph) {
-  GraphChecker graph_checker(graph);
-  graph_checker.Run();
-  if (!graph_checker.IsValid()) {
-    for (const auto& error : graph_checker.GetErrors()) {
-      std::cout << error << std::endl;
-    }
-  }
-  ASSERT_TRUE(graph_checker.IsValid());
-}
-
-template <typename Expected>
-static void RunCodeNoCheck(CodeGenerator* codegen,
-                           HGraph* graph,
-                           const std::function<void(HGraph*)>& hook_before_codegen,
-                           bool has_result,
-                           Expected expected) {
-  SsaLivenessAnalysis liveness(graph, codegen);
-  PrepareForRegisterAllocation(graph).Run();
-  liveness.Analyze();
-  RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
-  hook_before_codegen(graph);
-  InternalCodeAllocator allocator;
-  codegen->Compile(&allocator);
-  Run(allocator, *codegen, has_result, expected);
-}
-
-template <typename Expected>
-static void RunCode(CodeGenerator* codegen,
-                    HGraph* graph,
-                    std::function<void(HGraph*)> hook_before_codegen,
-                    bool has_result,
-                    Expected expected) {
-  ValidateGraph(graph);
-  RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected);
-}
-
-template <typename Expected>
-static void RunCode(CodegenTargetConfig target_config,
-                    HGraph* graph,
-                    std::function<void(HGraph*)> hook_before_codegen,
-                    bool has_result,
-                    Expected expected) {
-  CompilerOptions compiler_options;
-  std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
-  RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
-}
-
-#ifdef ART_ENABLE_CODEGEN_arm
-CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) {
-  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
-      ArmInstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena()) TestCodeGeneratorARM(graph,
-                                                      *features_arm.get(),
-                                                      compiler_options);
-}
-
-CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
-  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
-      ArmInstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena())
-      TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
-  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
-      Arm64InstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
-                                                           *features_arm64.get(),
-                                                           compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
-  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
-      X86InstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86_64
-CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
-  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
-     X86_64InstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena())
-      x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips
-CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
-  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
-      MipsInstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena())
-      mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips64
-CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
-  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
-      Mips64InstructionSetFeatures::FromCppDefines());
-  return new (graph->GetArena())
-      mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
-}
-#endif
-
 // Return all combinations of ISA and code generator that are executable on
 // hardware, or on simulator, and that we'd like to test.
 static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
new file mode 100644
index 0000000000..cd954043f5
--- /dev/null
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm/registers_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/instruction_set.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "arch/mips/registers_mips.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/mips64/registers_mips64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86/registers_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "code_simulator_container.h"
+#include "common_compiler_test.h"
+#include "graph_checker.h"
+#include "prepare_for_register_allocation.h"
+#include "ssa_liveness_analysis.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#include "code_generator_arm_vixl.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
+namespace art {
+
+typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
+
+class CodegenTargetConfig {
+ public:
+  CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen)
+      : isa_(isa), create_codegen_(create_codegen) {
+  }
+  InstructionSet GetInstructionSet() const { return isa_; }
+  CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) {
+    return create_codegen_(graph, compiler_options);
+  }
+
+ private:
+  CodegenTargetConfig() {}
+  InstructionSet isa_;
+  CreateCodegenFn create_codegen_;
+};
+
+#ifdef ART_ENABLE_CODEGEN_arm
+// Provide our own codegen, that ensures the C calling conventions
+// are preserved. Currently, ART and C do not match as R4 is caller-save
+// in ART, and callee-save in C. Alternatively, we could use or write
+// the stub that saves and restores all registers, but it is easier
+// to just overwrite the code generator.
+class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
+ public:
+  TestCodeGeneratorARM(HGraph* graph,
+                       const ArmInstructionSetFeatures& isa_features,
+                       const CompilerOptions& compiler_options)
+      : arm::CodeGeneratorARM(graph, isa_features, compiler_options) {
+    AddAllocatedRegister(Location::RegisterLocation(arm::R6));
+    AddAllocatedRegister(Location::RegisterLocation(arm::R7));
+  }
+
+  void SetupBlockedRegisters() const OVERRIDE {
+    arm::CodeGeneratorARM::SetupBlockedRegisters();
+    blocked_core_registers_[arm::R4] = true;
+    blocked_core_registers_[arm::R6] = false;
+    blocked_core_registers_[arm::R7] = false;
+  }
+};
+
+// A way to test the VIXL32-based code generator on ARM. This will replace
+// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one.
+class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
+ public:
+  TestCodeGeneratorARMVIXL(HGraph* graph,
+                           const ArmInstructionSetFeatures& isa_features,
+                           const CompilerOptions& compiler_options)
+      : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
+    AddAllocatedRegister(Location::RegisterLocation(arm::R6));
+    AddAllocatedRegister(Location::RegisterLocation(arm::R7));
+  }
+
+  void SetupBlockedRegisters() const OVERRIDE {
+    arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
+    blocked_core_registers_[arm::R4] = true;
+    blocked_core_registers_[arm::R6] = false;
+    blocked_core_registers_[arm::R7] = false;
+  }
+};
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
+ public:
+  TestCodeGeneratorX86(HGraph* graph,
+                       const X86InstructionSetFeatures& isa_features,
+                       const CompilerOptions& compiler_options)
+      : x86::CodeGeneratorX86(graph, isa_features, compiler_options) {
+    // Save edi, we need it for getting enough registers for long multiplication.
+    AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
+  }
+
+  void SetupBlockedRegisters() const OVERRIDE {
+    x86::CodeGeneratorX86::SetupBlockedRegisters();
+    // ebx is a callee-save register in C, but caller-save for ART.
+    blocked_core_registers_[x86::EBX] = true;
+
+    // Make edi available.
+    blocked_core_registers_[x86::EDI] = false;
+  }
+};
+#endif
+
+class InternalCodeAllocator : public CodeAllocator {
+ public:
+  InternalCodeAllocator() : size_(0) { }
+
+  virtual uint8_t* Allocate(size_t size) {
+    size_ = size;
+    memory_.reset(new uint8_t[size]);
+    return memory_.get();
+  }
+
+  size_t GetSize() const { return size_; }
+  uint8_t* GetMemory() const { return memory_.get(); }
+
+ private:
+  size_t size_;
+  std::unique_ptr<uint8_t[]> memory_;
+
+  DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+};
+
+static bool CanExecuteOnHardware(InstructionSet target_isa) {
+  return (target_isa == kRuntimeISA)
+      // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
+      || (kRuntimeISA == kArm && target_isa == kThumb2);
+}
+
+static bool CanExecute(InstructionSet target_isa) {
+  CodeSimulatorContainer simulator(target_isa);
+  return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
+}
+
+template <typename Expected>
+inline static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
+
+template <>
+inline bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnBool();
+}
+
+template <>
+inline int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt32();
+}
+
+template <>
+inline int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt64();
+}
+
+template <typename Expected>
+static void VerifyGeneratedCode(InstructionSet target_isa,
+                                Expected (*f)(),
+                                bool has_result,
+                                Expected expected) {
+  ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
+
+  // Verify on simulator.
+  CodeSimulatorContainer simulator(target_isa);
+  if (simulator.CanSimulate()) {
+    Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+
+  // Verify on hardware.
+  if (CanExecuteOnHardware(target_isa)) {
+    Expected result = f();
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+}
+
+template <typename Expected>
+static void Run(const InternalCodeAllocator& allocator,
+                const CodeGenerator& codegen,
+                bool has_result,
+                Expected expected) {
+  InstructionSet target_isa = codegen.GetInstructionSet();
+
+  typedef Expected (*fptr)();
+  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
+  fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+  if (target_isa == kThumb2) {
+    // For thumb we need the bottom bit set.
+    f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
+  }
+  VerifyGeneratedCode(target_isa, f, has_result, expected);
+}
+
+static void ValidateGraph(HGraph* graph) {
+  GraphChecker graph_checker(graph);
+  graph_checker.Run();
+  if (!graph_checker.IsValid()) {
+    for (const auto& error : graph_checker.GetErrors()) {
+      std::cout << error << std::endl;
+    }
+  }
+  ASSERT_TRUE(graph_checker.IsValid());
+}
+
+template <typename Expected>
+static void RunCodeNoCheck(CodeGenerator* codegen,
+                           HGraph* graph,
+                           const std::function<void(HGraph*)>& hook_before_codegen,
+                           bool has_result,
+                           Expected expected) {
+  SsaLivenessAnalysis liveness(graph, codegen);
+  PrepareForRegisterAllocation(graph).Run();
+  liveness.Analyze();
+  RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
+  hook_before_codegen(graph);
+  InternalCodeAllocator allocator;
+  codegen->Compile(&allocator);
+  Run(allocator, *codegen, has_result, expected);
+}
+
+template <typename Expected>
+static void RunCode(CodeGenerator* codegen,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
+  ValidateGraph(graph);
+  RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected);
+}
+
+template <typename Expected>
+static void RunCode(CodegenTargetConfig target_config,
+                    HGraph* graph,
+                    std::function<void(HGraph*)> hook_before_codegen,
+                    bool has_result,
+                    Expected expected) {
+  CompilerOptions compiler_options;
+  std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
+  RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
+}
+
+#ifdef ART_ENABLE_CODEGEN_arm
+CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) {
+  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+      ArmInstructionSetFeatures::FromCppDefines());
+  return new (graph->GetArena()) TestCodeGeneratorARM(graph,
+                                                      *features_arm.get(),
+                                                      compiler_options);
+}
+
+CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
+  std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+      ArmInstructionSetFeatures::FromCppDefines());
+  return new (graph->GetArena())
+      TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
+  std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+      Arm64InstructionSetFeatures::FromCppDefines());
+  return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
+                                                           *features_arm64.get(),
+                                                           compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
+  std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+      X86InstructionSetFeatures::FromCppDefines());
+  return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
+  std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+     X86_64InstructionSetFeatures::FromCppDefines());
+  return new (graph->GetArena())
+      x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
+  std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
+      MipsInstructionSetFeatures::FromCppDefines());
+  return new (graph->GetArena())
+      mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
+  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+      Mips64InstructionSetFeatures::FromCppDefines());
+  return new (graph->GetArena())
+      mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
+}
+#endif
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d15145e673..76900f23a9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1354,13 +1354,15 @@ std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind&
   return os;
 }
 
-void HInstruction::MoveBefore(HInstruction* cursor) {
-  DCHECK(!IsPhi());
-  DCHECK(!IsControlFlow());
-  DCHECK(CanBeMoved() ||
-         // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
-         IsShouldDeoptimizeFlag());
-  DCHECK(!cursor->IsPhi());
+void HInstruction::MoveBefore(HInstruction* cursor, bool do_checks) {
+  if (do_checks) {
+    DCHECK(!IsPhi());
+    DCHECK(!IsControlFlow());
+    DCHECK(CanBeMoved() ||
+           // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
+           IsShouldDeoptimizeFlag());
+    DCHECK(!cursor->IsPhi());
+  }
 
   next_->previous_ = previous_;
   if (previous_ != nullptr) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f0ea9e20e6..acf14aa726 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2065,8 +2065,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
     other->ReplaceInput(this, use_index);
   }
 
-  // Move `this` instruction before `cursor`.
-  void MoveBefore(HInstruction* cursor);
+  // Move `this` instruction before `cursor`
+  void MoveBefore(HInstruction* cursor, bool do_checks = true);
 
   // Move `this` before its first user and out of any loops. If there is no
   // out-of-loop user that dominates all other users, move the instruction
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 297500b12f..1ab671022b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -90,6 +90,7 @@
 #include "reference_type_propagation.h"
 #include "register_allocator_linear_scan.h"
 #include "select_generator.h"
+#include "scheduler.h"
 #include "sharpening.h"
 #include "side_effects_analysis.h"
 #include "ssa_builder.h"
@@ -658,10 +659,13 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
           new (arena) arm64::InstructionSimplifierArm64(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
       GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+      HInstructionScheduling* scheduling =
+          new (arena) HInstructionScheduling(graph, instruction_set);
       HOptimization* arm64_optimizations[] = {
         simplifier,
         side_effects,
-        gvn
+        gvn,
+        scheduling,
       };
       RunOptimizations(arm64_optimizations, arraysize(arm64_optimizations), pass_observer);
       break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 58d90176cd..bf963b8996 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -64,6 +64,9 @@ LiveInterval* BuildInterval(const size_t ranges[][2],
 void RemoveSuspendChecks(HGraph* graph) {
   for (HBasicBlock* block : graph->GetBlocks()) {
     if (block != nullptr) {
+      if (block->GetLoopInformation() != nullptr) {
+        block->GetLoopInformation()->SetSuspendCheck(nullptr);
+      }
       for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
         HInstruction* current = it.Current();
         if (current->IsSuspendCheck()) {
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
new file mode 100644
index 0000000000..d65d20cf43
--- /dev/null
+++ b/compiler/optimizing/scheduler.cc
@@ -0,0 +1,610 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+
+#include "prepare_for_register_allocation.h"
+#include "scheduler.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "scheduler_arm64.h"
+#endif
+
+namespace art {
+
+void SchedulingGraph::AddDependency(SchedulingNode* node,
+                                    SchedulingNode* dependency,
+                                    bool is_data_dependency) {
+  if (node == nullptr || dependency == nullptr) {
+    // A `nullptr` node indicates an instruction out of scheduling range (eg. in
+    // an other block), so we do not need to add a dependency edge to the graph.
+    return;
+  }
+
+  if (is_data_dependency) {
+    if (!HasImmediateDataDependency(node, dependency)) {
+      node->AddDataPredecessor(dependency);
+    }
+  } else if (!HasImmediateOtherDependency(node, dependency)) {
+    node->AddOtherPredecessor(dependency);
+  }
+}
+
+static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) {
+  // Read after write.
+  if (node.MayDependOn(other)) {
+    return true;
+  }
+
+  // Write after read.
+  if (other.MayDependOn(node)) {
+    return true;
+  }
+
+  // Memory write after write.
+  if (node.DoesAnyWrite() && other.DoesAnyWrite()) {
+    return true;
+  }
+
+  return false;
+}
+
+
+// Check whether `node` depends on `other`, taking into account `SideEffect`
+// information and `CanThrow` information.
+static bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) {
+  if (MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) {
+    return true;
+  }
+
+  if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) {
+    return true;
+  }
+
+  if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) {
+    return true;
+  }
+
+  if (other->CanThrow() && node->CanThrow()) {
+    return true;
+  }
+
+  // Check side-effect dependency between ArrayGet and BoundsCheck.
+  if (node->IsArrayGet() && other->IsBoundsCheck() && node->InputAt(1) == other) {
+    return true;
+  }
+
+  return false;
+}
+
+void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) {
+  SchedulingNode* instruction_node = GetNode(instruction);
+
+  // Define-use dependencies.
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    AddDataDependency(GetNode(use.GetUser()), instruction_node);
+  }
+
+  // Scheduling barrier dependencies.
+  DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
+  if (contains_scheduling_barrier_) {
+    // A barrier depends on instructions after it. And instructions before the
+    // barrier depend on it.
+    for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
+      SchedulingNode* other_node = GetNode(other);
+      bool other_is_barrier = other_node->IsSchedulingBarrier();
+      if (is_scheduling_barrier || other_is_barrier) {
+        AddOtherDependency(other_node, instruction_node);
+      }
+      if (other_is_barrier) {
+        // This other scheduling barrier guarantees ordering of instructions after
+        // it, so avoid creating additional useless dependencies in the graph.
+        // For example if we have
+        //     instr_1
+        //     barrier_2
+        //     instr_3
+        //     barrier_4
+        //     instr_5
+        // we only create the following non-data dependencies
+        //     1 -> 2
+        //     2 -> 3
+        //     2 -> 4
+        //     3 -> 4
+        //     4 -> 5
+        // and do not create
+        //     1 -> 4
+        //     2 -> 5
+        // Note that in this example we could also avoid creating the dependency
+        // `2 -> 4`.  But if we remove `instr_3` that dependency is required to
+        // order the barriers. So we generate it to avoid a special case.
+        break;
+      }
+    }
+  }
+
+  // Side effect dependencies.
+  if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
+    for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
+      SchedulingNode* other_node = GetNode(other);
+      if (other_node->IsSchedulingBarrier()) {
+        // We have reached a scheduling barrier so we can stop further
+        // processing.
+        DCHECK(HasImmediateOtherDependency(other_node, instruction_node));
+        break;
+      }
+      if (HasSideEffectDependency(other, instruction)) {
+        AddOtherDependency(other_node, instruction_node);
+      }
+    }
+  }
+
+  // Environment dependencies.
+  // We do not need to process those if the instruction is a scheduling barrier,
+  // since the barrier already has non-data dependencies on all following
+  // instructions.
+  if (!is_scheduling_barrier) {
+    for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+      // Note that here we could stop processing if the environment holder is
+      // across a scheduling barrier. But checking this would likely require
+      // more work than simply iterating through environment uses.
+      AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
+    }
+  }
+}
+
+bool SchedulingGraph::HasImmediateDataDependency(const SchedulingNode* node,
+                                                 const SchedulingNode* other) const {
+  return ContainsElement(node->GetDataPredecessors(), other);
+}
+
+bool SchedulingGraph::HasImmediateDataDependency(const HInstruction* instruction,
+                                                 const HInstruction* other_instruction) const {
+  const SchedulingNode* node = GetNode(instruction);
+  const SchedulingNode* other = GetNode(other_instruction);
+  if (node == nullptr || other == nullptr) {
+    // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
+    // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
+    // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
+    // instruction and other_instruction are in different basic blocks.
+    return false;
+  }
+  return HasImmediateDataDependency(node, other);
+}
+
+bool SchedulingGraph::HasImmediateOtherDependency(const SchedulingNode* node,
+                                                  const SchedulingNode* other) const {
+  return ContainsElement(node->GetOtherPredecessors(), other);
+}
+
+bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instruction,
+                                                  const HInstruction* other_instruction) const {
+  const SchedulingNode* node = GetNode(instruction);
+  const SchedulingNode* other = GetNode(other_instruction);
+  if (node == nullptr || other == nullptr) {
+    // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
+    // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
+    // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
+    // instruction and other_instruction are in different basic blocks.
+    return false;
+  }
+  return HasImmediateOtherDependency(node, other);
+}
+
+static const std::string InstructionTypeId(const HInstruction* instruction) {
+  std::string id;
+  Primitive::Type type = instruction->GetType();
+  if (type == Primitive::kPrimNot) {
+    id.append("l");
+  } else {
+    id.append(Primitive::Descriptor(instruction->GetType()));
+  }
+  // Use lower-case to be closer to the `HGraphVisualizer` output.
+  id[0] = std::tolower(id[0]);
+  id.append(std::to_string(instruction->GetId()));
+  return id;
+}
+
+// Ideally we would reuse the graph visualizer code, but it is not available
+// from here and it is not worth moving all that code only for our use.
+static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
+  const HInstruction* instruction = node->GetInstruction();
+  // Use the instruction typed id as the node identifier.
+  std::string instruction_id = InstructionTypeId(instruction);
+  output << instruction_id << "[shape=record, label=\""
+      << instruction_id << ' ' << instruction->DebugName() << " [";
+  // List the instruction's inputs in its description. When visualizing the
+  // graph this helps differentiating data inputs from other dependencies.
+  const char* seperator = "";
+  for (const HInstruction* input : instruction->GetInputs()) {
+    output << seperator << InstructionTypeId(input);
+    seperator = ",";
+  }
+  output << "]";
+  // Other properties of the node.
+  output << "\\ninternal_latency: " << node->GetInternalLatency();
+  output << "\\ncritical_path: " << node->GetCriticalPath();
+  if (node->IsSchedulingBarrier()) {
+    output << "\\n(barrier)";
+  }
+  output << "\"];\n";
+  // We want program order to go from top to bottom in the graph output, so we
+  // reverse the edges and specify `dir=back`.
+  for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
+    const HInstruction* predecessor_instruction = predecessor->GetInstruction();
+    output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
+        << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
+  }
+  for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
+    const HInstruction* predecessor_instruction = predecessor->GetInstruction();
+    output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
+        << "[dir=back,color=blue]\n";
+  }
+}
+
+void SchedulingGraph::DumpAsDotGraph(const std::string& description,
+                                     const ArenaVector<SchedulingNode*>& initial_candidates) {
+  // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
+  // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
+  std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
+  // Description of this graph, as a comment.
+  output << "// " << description << "\n";
+  // Start the dot graph. Use an increasing index for easier differentiation.
+  output << "digraph G {\n";
+  for (const auto& entry : nodes_map_) {
+    DumpAsDotNode(output, entry.second);
+  }
+  // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
+  for (auto node : initial_candidates) {
+    const HInstruction* instruction = node->GetInstruction();
+    output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
+      << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
+  }
+  // End of the dot graph.
+  output << "}\n";
+  output.close();
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
+    ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
+  // Schedule condition inputs that can be materialized immediately before their use.
+  // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
+  // immediately, because it is a materialized condition, and will be emitted right before HSelect
+  // in codegen phase.
+  //
+  // i20 HLessThan [...]                  HLessThan    HAdd      HAdd
+  // i21 HAdd [...]                ===>      |          |         |
+  // i22 HAdd [...]                          +----------+---------+
+  // i23 HSelect [i21, i22, i20]                     HSelect
+
+  if (prev_select_ == nullptr) {
+    return nullptr;
+  }
+
+  const HInstruction* instruction = prev_select_->GetInstruction();
+  const HCondition* condition = nullptr;
+  DCHECK(instruction != nullptr);
+
+  if (instruction->IsIf()) {
+    condition = instruction->AsIf()->InputAt(0)->AsCondition();
+  } else if (instruction->IsSelect()) {
+    condition = instruction->AsSelect()->GetCondition()->AsCondition();
+  }
+
+  SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
+
+  if ((condition_node != nullptr) &&
+      condition->HasOnlyOneNonEnvironmentUse() &&
+      ContainsElement(*nodes, condition_node)) {
+    DCHECK(!condition_node->HasUnscheduledSuccessors());
+    // Remove the condition from the list of candidates and schedule it.
+    RemoveElement(*nodes, condition_node);
+    return condition_node;
+  }
+
+  return nullptr;
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
+    ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
+  DCHECK(!nodes->empty());
+  SchedulingNode* select_node = nullptr;
+
+  // Optimize for materialized condition and its emit before use scenario.
+  select_node = SelectMaterializedCondition(nodes, graph);
+
+  if (select_node == nullptr) {
+    // Get highest priority node based on critical path information.
+    select_node = (*nodes)[0];
+    size_t select = 0;
+    for (size_t i = 1, e = nodes->size(); i < e; i++) {
+      SchedulingNode* check = (*nodes)[i];
+      SchedulingNode* candidate = (*nodes)[select];
+      select_node = GetHigherPrioritySchedulingNode(candidate, check);
+      if (select_node == check) {
+        select = i;
+      }
+    }
+    DeleteNodeAtIndex(nodes, select);
+  }
+
+  prev_select_ = select_node;
+  return select_node;
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
+    SchedulingNode* candidate, SchedulingNode* check) const {
+  uint32_t candidate_path = candidate->GetCriticalPath();
+  uint32_t check_path = check->GetCriticalPath();
+  // First look at the critical_path.
+  if (check_path != candidate_path) {
+    return check_path < candidate_path ? check : candidate;
+  }
+  // If both critical paths are equal, schedule instructions with a higher latency
+  // first in program order.
+  return check->GetLatency() < candidate->GetLatency() ? check : candidate;
+}
+
+void HScheduler::Schedule(HGraph* graph) {
+  for (HBasicBlock* block : graph->GetReversePostOrder()) {
+    if (IsSchedulable(block)) {
+      Schedule(block);
+    }
+  }
+}
+
+void HScheduler::Schedule(HBasicBlock* block) {
+  ArenaVector<SchedulingNode*> scheduling_nodes(arena_->Adapter(kArenaAllocScheduler));
+
+  // Build the scheduling graph.
+  scheduling_graph_.Clear();
+  for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* instruction = it.Current();
+    SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction));
+    CalculateLatency(node);
+    scheduling_nodes.push_back(node);
+  }
+
+  if (scheduling_graph_.Size() <= 1) {
+    scheduling_graph_.Clear();
+    return;
+  }
+
+  cursor_ = block->GetLastInstruction();
+
+  // Find the initial candidates for scheduling.
+  candidates_.clear();
+  for (SchedulingNode* node : scheduling_nodes) {
+    if (!node->HasUnscheduledSuccessors()) {
+      node->MaybeUpdateCriticalPath(node->GetLatency());
+      candidates_.push_back(node);
+    }
+  }
+
+  ArenaVector<SchedulingNode*> initial_candidates(arena_->Adapter(kArenaAllocScheduler));
+  if (kDumpDotSchedulingGraphs) {
+    // Remember the list of initial candidates for debug output purposes.
+    initial_candidates.assign(candidates_.begin(), candidates_.end());
+  }
+
+  // Schedule all nodes.
+  while (!candidates_.empty()) {
+    Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_));
+  }
+
+  if (kDumpDotSchedulingGraphs) {
+    // Dump the graph in `dot` format.
+    HGraph* graph = block->GetGraph();
+    std::stringstream description;
+    description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
+        << " B" << block->GetBlockId();
+    scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates);
+  }
+}
+
+void HScheduler::Schedule(SchedulingNode* scheduling_node) {
+  // Check whether any of the node's predecessors will be valid candidates after
+  // this node is scheduled.
+  uint32_t path_to_node = scheduling_node->GetCriticalPath();
+  for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
+    predecessor->MaybeUpdateCriticalPath(
+        path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
+    predecessor->DecrementNumberOfUnscheduledSuccessors();
+    if (!predecessor->HasUnscheduledSuccessors()) {
+      candidates_.push_back(predecessor);
+    }
+  }
+  for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
+    // Do not update the critical path.
+    // The 'other' (so 'non-data') dependencies (usually) do not represent a
+    // 'material' dependency of nodes on others. They exist for program
+    // correctness. So we do not use them to compute the critical path.
+    predecessor->DecrementNumberOfUnscheduledSuccessors();
+    if (!predecessor->HasUnscheduledSuccessors()) {
+      candidates_.push_back(predecessor);
+    }
+  }
+
+  Schedule(scheduling_node->GetInstruction());
+}
+
+// Move an instruction after cursor instruction inside one basic block.
+static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
+  DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
+  DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
+  DCHECK(!instruction->IsControlFlow());
+  DCHECK(!cursor->IsControlFlow());
+  instruction->MoveBefore(cursor->GetNext(), /* do_checks */ false);
+}
+
+void HScheduler::Schedule(HInstruction* instruction) {
+  if (instruction == cursor_) {
+    cursor_ = cursor_->GetPrevious();
+  } else {
+    MoveAfterInBlock(instruction, cursor_);
+  }
+}
+
+bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
+  // We want to avoid exhaustively listing all instructions, so we first check
+  // for instruction categories that we know are safe.
+  if (instruction->IsControlFlow() ||
+      instruction->IsConstant()) {
+    return true;
+  }
+  // Currently all unary and binary operations are safe to schedule, so avoid
+  // checking for each of them individually.
+  // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
+  // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
+  // the exhaustive lists here.
+  if (instruction->IsUnaryOperation()) {
+    DCHECK(instruction->IsBooleanNot() ||
+           instruction->IsNot() ||
+           instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
+    return true;
+  }
+  if (instruction->IsBinaryOperation()) {
+    DCHECK(instruction->IsAdd() ||
+           instruction->IsAnd() ||
+           instruction->IsCompare() ||
+           instruction->IsCondition() ||
+           instruction->IsDiv() ||
+           instruction->IsMul() ||
+           instruction->IsOr() ||
+           instruction->IsRem() ||
+           instruction->IsRor() ||
+           instruction->IsShl() ||
+           instruction->IsShr() ||
+           instruction->IsSub() ||
+           instruction->IsUShr() ||
+           instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
+    return true;
+  }
+  // The scheduler should not see any of these.
+  DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
+  // List of instructions explicitly excluded:
+  //    HClearException
+  //    HClinitCheck
+  //    HDeoptimize
+  //    HLoadClass
+  //    HLoadException
+  //    HMemoryBarrier
+  //    HMonitorOperation
+  //    HNativeDebugInfo
+  //    HThrow
+  //    HTryBoundary
+  // TODO: Some of the instructions above may be safe to schedule (maybe as
+  // scheduling barriers).
+  return instruction->IsArrayGet() ||
+      instruction->IsArraySet() ||
+      instruction->IsArrayLength() ||
+      instruction->IsBoundType() ||
+      instruction->IsBoundsCheck() ||
+      instruction->IsCheckCast() ||
+      instruction->IsClassTableGet() ||
+      instruction->IsCurrentMethod() ||
+      instruction->IsDivZeroCheck() ||
+      instruction->IsInstanceFieldGet() ||
+      instruction->IsInstanceFieldSet() ||
+      instruction->IsInstanceOf() ||
+      instruction->IsInvokeInterface() ||
+      instruction->IsInvokeStaticOrDirect() ||
+      instruction->IsInvokeUnresolved() ||
+      instruction->IsInvokeVirtual() ||
+      instruction->IsLoadString() ||
+      instruction->IsNewArray() ||
+      instruction->IsNewInstance() ||
+      instruction->IsNullCheck() ||
+      instruction->IsPackedSwitch() ||
+      instruction->IsParameterValue() ||
+      instruction->IsPhi() ||
+      instruction->IsReturn() ||
+      instruction->IsReturnVoid() ||
+      instruction->IsSelect() ||
+      instruction->IsStaticFieldGet() ||
+      instruction->IsStaticFieldSet() ||
+      instruction->IsSuspendCheck() ||
+      instruction->IsTypeConversion() ||
+      instruction->IsUnresolvedInstanceFieldGet() ||
+      instruction->IsUnresolvedInstanceFieldSet() ||
+      instruction->IsUnresolvedStaticFieldGet() ||
+      instruction->IsUnresolvedStaticFieldSet();
+}
+
+bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
+  // We may be only interested in loop blocks.
+  if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
+    return false;
+  }
+  if (block->GetTryCatchInformation() != nullptr) {
+    // Do not schedule blocks that are part of try-catch.
+    // Because scheduler cannot see if catch block has assumptions on the instruction order in
+    // the try block. In following example, if we enable scheduler for the try block,
+    // MulitiplyAccumulate may be scheduled before DivZeroCheck,
+    // which can result in an incorrect value in the catch block.
+    //   try {
+    //     a = a/b;    // DivZeroCheck
+    //                 // Div
+    //     c = c*d+e;  // MulitiplyAccumulate
+    //   } catch {System.out.print(c); }
+    return false;
+  }
+  // Check whether all instructions in this block are schedulable.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    if (!IsSchedulable(it.Current())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
+  return instr->IsControlFlow() ||
+      // Don't break calling convention.
+      instr->IsParameterValue() ||
+      // Code generation of goto relies on SuspendCheck's position.
+      instr->IsSuspendCheck();
+}
+
+void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
+                                 bool schedule_randomly) {
+  // Avoid compilation error when compiling for unsupported instruction set.
+  UNUSED(only_optimize_loop_blocks);
+  UNUSED(schedule_randomly);
+  switch (instruction_set_) {
+#ifdef ART_ENABLE_CODEGEN_arm64
+    case kArm64: {
+      // Phase-local allocator that allocates scheduler internal data structures like
+      // scheduling nodes, internel nodes map, dependencies, etc.
+      ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+
+      CriticalPathSchedulingNodeSelector critical_path_selector;
+      RandomSchedulingNodeSelector random_selector;
+      SchedulingNodeSelector* selector = schedule_randomly
+          ? static_cast<SchedulingNodeSelector*>(&random_selector)
+          : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+
+      arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
+      scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+      scheduler.Schedule(graph_);
+      break;
+    }
+#endif
+    default:
+      break;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
new file mode 100644
index 0000000000..ab0dad4300
--- /dev/null
+++ b/compiler/optimizing/scheduler.h
@@ -0,0 +1,487 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_H_
+
+#include <fstream>
+
+#include "base/time_utils.h"
+#include "driver/compiler_driver.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+// General description of instruction scheduling.
+//
+// This pass tries to improve the quality of the generated code by reordering
+// instructions in the graph to avoid execution delays caused by execution
+// dependencies.
+// Currently, scheduling is performed at the block level, so no `HInstruction`
+// ever leaves its block in this pass.
+//
+// The scheduling process iterates through blocks in the graph. For blocks that
+// we can and want to schedule:
+// 1) Build a dependency graph for instructions.
+//    It includes data dependencies (inputs/uses), but also environment
+//    dependencies and side-effect dependencies.
+// 2) Schedule the dependency graph.
+//    This is a topological sort of the dependency graph, using heuristics to
+//    decide what node to scheduler first when there are multiple candidates.
+//
+// A few factors impacting the quality of the scheduling are:
+// - The heuristics used to decide what node to schedule in the topological sort
+//   when there are multiple valid candidates. There is a wide range of
+//   complexity possible here, going from a simple model only considering
+//   latencies, to a super detailed CPU pipeline model.
+// - Fewer dependencies in the dependency graph give more freedom for the
+//   scheduling heuristics. For example de-aliasing can allow possibilities for
+//   reordering of memory accesses.
+// - The level of abstraction of the IR. It is easier to evaluate scheduling for
+//   IRs that translate to a single assembly instruction than for IRs
+//   that generate multiple assembly instructions or generate different code
+//   depending on properties of the IR.
+// - Scheduling is performed before register allocation, it is not aware of the
+//   impact of moving instructions on register allocation.
+//
+//
+// The scheduling code uses the terms predecessors, successors, and dependencies.
+// This can be confusing at times, so here are clarifications.
+// These terms are used from the point of view of the program dependency graph. So
+// the inputs of an instruction are part of its dependencies, and hence part its
+// predecessors. So the uses of an instruction are (part of) its successors.
+// (Side-effect dependencies can yield predecessors or successors that are not
+// inputs or uses.)
+//
+// Here is a trivial example. For the Java code:
+//
+//    int a = 1 + 2;
+//
+// we would have the instructions
+//
+//    i1 HIntConstant 1
+//    i2 HIntConstant 2
+//    i3 HAdd [i1,i2]
+//
+// `i1` and `i2` are predecessors of `i3`.
+// `i3` is a successor of `i1` and a successor of `i2`.
+// In a scheduling graph for this code we would have three nodes `n1`, `n2`,
+// and `n3` (respectively for instructions `i1`, `i1`, and `i3`).
+// Conceptually the program dependency graph for this would contain two edges
+//
+//    n1 -> n3
+//    n2 -> n3
+//
+// Since we schedule backwards (starting from the last instruction in each basic
+// block), the implementation of nodes keeps a list of pointers their
+// predecessors. So `n3` would keep pointers to its predecessors `n1` and `n2`.
+//
+// Node dependencies are also referred to from the program dependency graph
+// point of view: we say that node `B` immediately depends on `A` if there is an
+// edge from `A` to `B` in the program dependency graph. `A` is a predecessor of
+// `B`, `B` is a successor of `A`. In the example above `n3` depends on `n1` and
+// `n2`.
+// Since nodes in the scheduling graph keep a list of their predecessors, node
+// `B` will have a pointer to its predecessor `A`.
+// As we schedule backwards, `B` will be selected for scheduling before `A` is.
+//
+// So the scheduling for the example above could happen as follow
+//
+//    |---------------------------+------------------------|
+//    | candidates for scheduling | instructions scheduled |
+//    | --------------------------+------------------------|
+//
+// The only node without successors is `n3`, so it is the only initial
+// candidate.
+//
+//    | n3                        | (none)                 |
+//
+// We schedule `n3` as the last (and only) instruction. All its predecessors
+// that do not have any unscheduled successors become candidate. That is, `n1`
+// and `n2` become candidates.
+//
+//    | n1, n2                    | n3                     |
+//
+// One of the candidates is selected. In practice this is where scheduling
+// heuristics kick in, to decide which of the candidates should be selected.
+// In this example, let it be `n1`. It is scheduled before previously scheduled
+// nodes (in program order). There are no other nodes to add to the list of
+// candidates.
+//
+//    | n2                        | n1                     |
+//    |                           | n3                     |
+//
+// The only candidate available for scheduling is `n2`. Schedule it before
+// (in program order) the previously scheduled nodes.
+//
+//    | (none)                    | n2                     |
+//    |                           | n1                     |
+//    |                           | n3                     |
+//    |---------------------------+------------------------|
+//
+// So finally the instructions will be executed in the order `i2`, `i1`, and `i3`.
+// In this trivial example, it does not matter which of `i1` and `i2` is
+// scheduled first since they are constants. However the same process would
+// apply if `i1` and `i2` were actual operations (for example `HMul` and `HDiv`).
+
+// Set to true to have instruction scheduling dump scheduling graphs to the file
+// `scheduling_graphs.dot`. See `SchedulingGraph::DumpAsDotGraph()`.
+static constexpr bool kDumpDotSchedulingGraphs = false;
+
+// Typically used as a default instruction latency.
+static constexpr uint32_t kGenericInstructionLatency = 1;
+
+class HScheduler;
+
+/**
+ * A node representing an `HInstruction` in the `SchedulingGraph`.
+ */
+class SchedulingNode : public ArenaObject<kArenaAllocScheduler> {
+ public:
+  SchedulingNode(HInstruction* instr, ArenaAllocator* arena, bool is_scheduling_barrier)
+      : latency_(0),
+        internal_latency_(0),
+        critical_path_(0),
+        instruction_(instr),
+        is_scheduling_barrier_(is_scheduling_barrier),
+        data_predecessors_(arena->Adapter(kArenaAllocScheduler)),
+        other_predecessors_(arena->Adapter(kArenaAllocScheduler)),
+        num_unscheduled_successors_(0) {
+    data_predecessors_.reserve(kPreallocatedPredecessors);
+  }
+
+  void AddDataPredecessor(SchedulingNode* predecessor) {
+    data_predecessors_.push_back(predecessor);
+    predecessor->num_unscheduled_successors_++;
+  }
+
+  void AddOtherPredecessor(SchedulingNode* predecessor) {
+    other_predecessors_.push_back(predecessor);
+    predecessor->num_unscheduled_successors_++;
+  }
+
+  void DecrementNumberOfUnscheduledSuccessors() {
+    num_unscheduled_successors_--;
+  }
+
+  void MaybeUpdateCriticalPath(uint32_t other_critical_path) {
+    critical_path_ = std::max(critical_path_, other_critical_path);
+  }
+
+  bool HasUnscheduledSuccessors() const {
+    return num_unscheduled_successors_ != 0;
+  }
+
+  HInstruction* GetInstruction() const { return instruction_; }
+  uint32_t GetLatency() const { return latency_; }
+  void SetLatency(uint32_t latency) { latency_ = latency; }
+  uint32_t GetInternalLatency() const { return internal_latency_; }
+  void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; }
+  uint32_t GetCriticalPath() const { return critical_path_; }
+  bool IsSchedulingBarrier() const { return is_scheduling_barrier_; }
+  const ArenaVector<SchedulingNode*>& GetDataPredecessors() const { return data_predecessors_; }
+  const ArenaVector<SchedulingNode*>& GetOtherPredecessors() const { return other_predecessors_; }
+
+ private:
+  // The latency of this node. It represents the latency between the moment the
+  // last instruction for this node has executed to the moment the result
+  // produced by this node is available to users.
+  uint32_t latency_;
+  // This represents the time spent *within* the generated code for this node.
+  // It should be zero for nodes that only generate a single instruction.
+  uint32_t internal_latency_;
+
+  // The critical path from this instruction to the end of scheduling. It is
+  // used by the scheduling heuristics to measure the priority of this instruction.
+  // It is defined as
+  //     critical_path_ = latency_ + max((use.internal_latency_ + use.critical_path_) for all uses)
+  // (Note that here 'uses' is equivalent to 'data successors'. Also see comments in
+  // `HScheduler::Schedule(SchedulingNode* scheduling_node)`).
+  uint32_t critical_path_;
+
+  // The instruction that this node represents.
+  HInstruction* const instruction_;
+
+  // If a node is scheduling barrier, other nodes cannot be scheduled before it.
+  const bool is_scheduling_barrier_;
+
+  // The lists of predecessors. They cannot be scheduled before this node. Once
+  // this node is scheduled, we check whether any of its predecessors has become a
+  // valid candidate for scheduling.
+  // Predecessors in `data_predecessors_` are data dependencies. Those in
+  // `other_predecessors_` contain side-effect dependencies, environment
+  // dependencies, and scheduling barrier dependencies.
+  ArenaVector<SchedulingNode*> data_predecessors_;
+  ArenaVector<SchedulingNode*> other_predecessors_;
+
+  // The number of unscheduled successors for this node. This number is
+  // decremented as successors are scheduled. When it reaches zero this node
+  // becomes a valid candidate to schedule.
+  uint32_t num_unscheduled_successors_;
+
+  static constexpr size_t kPreallocatedPredecessors = 4;
+};
+
+/*
+ * Directed acyclic graph for scheduling.
+ */
+class SchedulingGraph : public ValueObject {
+ public:
+  SchedulingGraph(const HScheduler* scheduler, ArenaAllocator* arena)
+      : scheduler_(scheduler),
+        arena_(arena),
+        contains_scheduling_barrier_(false),
+        nodes_map_(arena_->Adapter(kArenaAllocScheduler)) {}
+
+  SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
+    SchedulingNode* node = new (arena_) SchedulingNode(instr, arena_, is_scheduling_barrier);
+    nodes_map_.Insert(std::make_pair(instr, node));
+    contains_scheduling_barrier_ |= is_scheduling_barrier;
+    AddDependencies(instr, is_scheduling_barrier);
+    return node;
+  }
+
+  void Clear() {
+    nodes_map_.Clear();
+    contains_scheduling_barrier_ = false;
+  }
+
+  SchedulingNode* GetNode(const HInstruction* instr) const {
+    auto it = nodes_map_.Find(instr);
+    if (it == nodes_map_.end()) {
+      return nullptr;
+    } else {
+      return it->second;
+    }
+  }
+
+  bool IsSchedulingBarrier(const HInstruction* instruction) const;
+
+  bool HasImmediateDataDependency(const SchedulingNode* node, const SchedulingNode* other) const;
+  bool HasImmediateDataDependency(const HInstruction* node, const HInstruction* other) const;
+  bool HasImmediateOtherDependency(const SchedulingNode* node, const SchedulingNode* other) const;
+  bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const;
+
+  size_t Size() const {
+    return nodes_map_.Size();
+  }
+
+  // Dump the scheduling graph, in dot file format, appending it to the file
+  // `scheduling_graphs.dot`.
+  void DumpAsDotGraph(const std::string& description,
+                      const ArenaVector<SchedulingNode*>& initial_candidates);
+
+ protected:
+  void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency);
+  void AddDataDependency(SchedulingNode* node, SchedulingNode* dependency) {
+    AddDependency(node, dependency, /*is_data_dependency*/true);
+  }
+  void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) {
+    AddDependency(node, dependency, /*is_data_dependency*/false);
+  }
+
+  // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects.
+  void AddDependencies(HInstruction* instruction, bool is_scheduling_barrier = false);
+
+  const HScheduler* const scheduler_;
+
+  ArenaAllocator* const arena_;
+
+  bool contains_scheduling_barrier_;
+
+  ArenaHashMap<const HInstruction*, SchedulingNode*> nodes_map_;
+};
+
+/*
+ * The visitors derived from this base class are used by schedulers to evaluate
+ * the latencies of `HInstruction`s.
+ */
+class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
+ public:
+  // This class and its sub-classes will never be used to drive a visit of an
+  // `HGraph` but only to visit `HInstructions` one at a time, so we do not need
+  // to pass a valid graph to `HGraphDelegateVisitor()`.
+  SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+
+  void VisitInstruction(HInstruction* instruction) OVERRIDE {
+    LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
+        "Architecture-specific scheduling latency visitors must handle all instructions"
+        " (potentially by overriding the generic `VisitInstruction()`.";
+    UNREACHABLE();
+  }
+
+  void Visit(HInstruction* instruction) {
+    instruction->Accept(this);
+  }
+
+  void CalculateLatency(SchedulingNode* node) {
+    // By default nodes have no internal latency.
+    last_visited_internal_latency_ = 0;
+    Visit(node->GetInstruction());
+  }
+
+  uint32_t GetLastVisitedLatency() const { return last_visited_latency_; }
+  uint32_t GetLastVisitedInternalLatency() const { return last_visited_internal_latency_; }
+
+ protected:
+  // The latency of the most recent visited SchedulingNode.
+  // This is for reporting the latency value to the user of this visitor.
+  uint32_t last_visited_latency_;
+  // This represents the time spent *within* the generated code for the most recent visited
+  // SchedulingNode. This is for reporting the internal latency value to the user of this visitor.
+  uint32_t last_visited_internal_latency_;
+};
+
+class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
+ public:
+  virtual SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+                                                 const SchedulingGraph& graph) = 0;
+  virtual ~SchedulingNodeSelector() {}
+ protected:
+  static void DeleteNodeAtIndex(ArenaVector<SchedulingNode*>* nodes, size_t index) {
+    (*nodes)[index] = nodes->back();
+    nodes->pop_back();
+  }
+};
+
+/*
+ * Select a `SchedulingNode` at random within the candidates.
+ */
+class RandomSchedulingNodeSelector : public SchedulingNodeSelector {
+ public:
+  explicit RandomSchedulingNodeSelector() : seed_(0) {
+    seed_  = static_cast<uint32_t>(NanoTime());
+    srand(seed_);
+  }
+
+  SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+                                         const SchedulingGraph& graph) OVERRIDE {
+    UNUSED(graph);
+    DCHECK(!nodes->empty());
+    size_t select = rand_r(&seed_) % nodes->size();
+    SchedulingNode* select_node = (*nodes)[select];
+    DeleteNodeAtIndex(nodes, select);
+    return select_node;
+  }
+
+  uint32_t seed_;
+};
+
+/*
+ * Select a `SchedulingNode` according to critical path information,
+ * with heuristics to favor certain instruction patterns like materialized condition.
+ */
+class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
+ public:
+  CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
+
+  SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+                                         const SchedulingGraph& graph) OVERRIDE;
+
+ protected:
+  SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate,
+                                                  SchedulingNode* check) const;
+
+  SchedulingNode* SelectMaterializedCondition(ArenaVector<SchedulingNode*>* nodes,
+                                               const SchedulingGraph& graph) const;
+
+ private:
+  const SchedulingNode* prev_select_;
+};
+
+class HScheduler {
+ public:
+  HScheduler(ArenaAllocator* arena,
+             SchedulingLatencyVisitor* latency_visitor,
+             SchedulingNodeSelector* selector)
+      : arena_(arena),
+        latency_visitor_(latency_visitor),
+        selector_(selector),
+        only_optimize_loop_blocks_(true),
+        scheduling_graph_(this, arena),
+        candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
+  virtual ~HScheduler() {}
+
+  void Schedule(HGraph* graph);
+
+  void SetOnlyOptimizeLoopBlocks(bool loop_only) { only_optimize_loop_blocks_ = loop_only; }
+
+  // Instructions can not be rescheduled across a scheduling barrier.
+  virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
+
+ protected:
+  void Schedule(HBasicBlock* block);
+  void Schedule(SchedulingNode* scheduling_node);
+  void Schedule(HInstruction* instruction);
+
+  // Any instruction returning `false` via this method will prevent its
+  // containing basic block from being scheduled.
+  // This method is used to restrict scheduling to instructions that we know are
+  // safe to handle.
+  virtual bool IsSchedulable(const HInstruction* instruction) const;
+  bool IsSchedulable(const HBasicBlock* block) const;
+
+  void CalculateLatency(SchedulingNode* node) {
+    latency_visitor_->CalculateLatency(node);
+    node->SetLatency(latency_visitor_->GetLastVisitedLatency());
+    node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
+  }
+
+  ArenaAllocator* const arena_;
+  SchedulingLatencyVisitor* const latency_visitor_;
+  SchedulingNodeSelector* const selector_;
+  bool only_optimize_loop_blocks_;
+
+  // We instantiate the members below as part of this class to avoid
+  // instantiating them locally for every chunk scheduled.
+  SchedulingGraph scheduling_graph_;
+  // A pointer indicating where the next instruction to be scheduled will be inserted.
+  HInstruction* cursor_;
+  // The list of candidates for scheduling. A node becomes a candidate when all
+  // its predecessors have been scheduled.
+  ArenaVector<SchedulingNode*> candidates_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HScheduler);
+};
+
+inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction) const {
+  return scheduler_->IsSchedulingBarrier(instruction);
+}
+
+class HInstructionScheduling : public HOptimization {
+ public:
+  HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+      : HOptimization(graph, kInstructionScheduling),
+        instruction_set_(instruction_set) {}
+
+  void Run() {
+    Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false);
+  }
+  void Run(bool only_optimize_loop_blocks, bool schedule_randomly);
+
+  static constexpr const char* kInstructionScheduling = "scheduler";
+
+  const InstructionSet instruction_set_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_H_
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
new file mode 100644
index 0000000000..e3701fbcb1
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scheduler_arm64.h"
+#include "code_generator_utils.h"
+
+namespace art {
+namespace arm64 {
+
+void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) {
+  last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType())
+      ? kArm64FloatingPointOpLatency
+      : kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight(
+    HBitwiseNegatedRight* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitIntermediateAddress(
+    HIntermediateAddress* ATTRIBUTE_UNUSED) {
+  // Although the code generated is a simple `add` instruction, we found through empirical results
+  // that spacing it from its use in memory accesses was beneficial.
+  last_visited_latency_ = kArm64IntegerOpLatency + 2;
+}
+
+void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64MulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) {
+  if (!instruction->GetArray()->IsIntermediateAddress()) {
+    // Take the intermediate address computation into account.
+    last_visited_internal_latency_ = kArm64IntegerOpLatency;
+  }
+  last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64MemoryStoreLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArm64IntegerOpLatency;
+  // Users do not use any data results.
+  last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) {
+  Primitive::Type type = instr->GetResultType();
+  switch (type) {
+    case Primitive::kPrimFloat:
+      last_visited_latency_ = kArm64DivFloatLatency;
+      break;
+    case Primitive::kPrimDouble:
+      last_visited_latency_ = kArm64DivDoubleLatency;
+      break;
+    default:
+      // Follow the code path used by code generation.
+      if (instr->GetRight()->IsConstant()) {
+        int64_t imm = Int64FromConstant(instr->GetRight()->AsConstant());
+        if (imm == 0) {
+          last_visited_internal_latency_ = 0;
+          last_visited_latency_ = 0;
+        } else if (imm == 1 || imm == -1) {
+          last_visited_internal_latency_ = 0;
+          last_visited_latency_ = kArm64IntegerOpLatency;
+        } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+          last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+          last_visited_latency_ = kArm64IntegerOpLatency;
+        } else {
+          DCHECK(imm <= -2 || imm >= 2);
+          last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+          last_visited_latency_ = kArm64MulIntegerLatency;
+        }
+      } else {
+        last_visited_latency_ = kArm64DivIntegerLatency;
+      }
+      break;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArm64CallInternalLatency;
+  last_visited_latency_ = kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArm64CallInternalLatency;
+  last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArm64LoadStringInternalLatency;
+  last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) {
+  last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType())
+      ? kArm64MulFloatingPointLatency
+      : kArm64MulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+  last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency;
+  last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitNewInstance(HNewInstance* instruction) {
+  if (instruction->IsStringAlloc()) {
+    last_visited_internal_latency_ = 2 + kArm64MemoryLoadLatency + kArm64CallInternalLatency;
+  } else {
+    last_visited_internal_latency_ = kArm64CallInternalLatency;
+  }
+  last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) {
+  if (Primitive::IsFloatingPointType(instruction->GetResultType())) {
+    last_visited_internal_latency_ = kArm64CallInternalLatency;
+    last_visited_latency_ = kArm64CallLatency;
+  } else {
+    // Follow the code path used by code generation.
+    if (instruction->GetRight()->IsConstant()) {
+      int64_t imm = Int64FromConstant(instruction->GetRight()->AsConstant());
+      if (imm == 0) {
+        last_visited_internal_latency_ = 0;
+        last_visited_latency_ = 0;
+      } else if (imm == 1 || imm == -1) {
+        last_visited_internal_latency_ = 0;
+        last_visited_latency_ = kArm64IntegerOpLatency;
+      } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+        last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+        last_visited_latency_ = kArm64IntegerOpLatency;
+      } else {
+        DCHECK(imm <= -2 || imm >= 2);
+        last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+        last_visited_latency_ = kArm64MulIntegerLatency;
+      }
+    } else {
+      last_visited_internal_latency_ = kArm64DivIntegerLatency;
+      last_visited_latency_ = kArm64MulIntegerLatency;
+    }
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  DCHECK((block->GetLoopInformation() != nullptr) ||
+         (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+  // Users do not use any data results.
+  last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) {
+  if (Primitive::IsFloatingPointType(instr->GetResultType()) ||
+      Primitive::IsFloatingPointType(instr->GetInputType())) {
+    last_visited_latency_ = kArm64TypeConversionFloatingPointIntegerLatency;
+  } else {
+    last_visited_latency_ = kArm64IntegerOpLatency;
+  }
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
new file mode 100644
index 0000000000..702027c535
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+
+#include "scheduler.h"
+
+namespace art {
+namespace arm64 {
+
+static constexpr uint32_t kArm64MemoryLoadLatency = 5;
+static constexpr uint32_t kArm64MemoryStoreLatency = 3;
+
+static constexpr uint32_t kArm64CallInternalLatency = 10;
+static constexpr uint32_t kArm64CallLatency = 5;
+
+// AArch64 instruction latency.
+// We currently assume that all arm64 CPUs share the same instruction latency list.
+static constexpr uint32_t kArm64IntegerOpLatency = 2;
+static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
+
+
+static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
+static constexpr uint32_t kArm64DivDoubleLatency = 30;
+static constexpr uint32_t kArm64DivFloatLatency = 15;
+static constexpr uint32_t kArm64DivIntegerLatency = 5;
+static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
+static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
+static constexpr uint32_t kArm64MulIntegerLatency = 6;
+static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+
+class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
+ public:
+  // Default visitor for instructions not handled specifically below.
+  void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+    last_visited_latency_ = kArm64IntegerOpLatency;
+  }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
+  M(ArrayGet         , unused)                   \
+  M(ArrayLength      , unused)                   \
+  M(ArraySet         , unused)                   \
+  M(BinaryOperation  , unused)                   \
+  M(BoundsCheck      , unused)                   \
+  M(Div              , unused)                   \
+  M(InstanceFieldGet , unused)                   \
+  M(InstanceOf       , unused)                   \
+  M(Invoke           , unused)                   \
+  M(LoadString       , unused)                   \
+  M(Mul              , unused)                   \
+  M(NewArray         , unused)                   \
+  M(NewInstance      , unused)                   \
+  M(Rem              , unused)                   \
+  M(StaticFieldGet   , unused)                   \
+  M(SuspendCheck     , unused)                   \
+  M(TypeConversion   , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+  M(BitwiseNegatedRight, unused)                 \
+  M(MultiplyAccumulate, unused)                  \
+  M(IntermediateAddress, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused)  \
+  void Visit##type(H##type* instruction) OVERRIDE;
+
+  FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+};
+
+class HSchedulerARM64 : public HScheduler {
+ public:
+  HSchedulerARM64(ArenaAllocator* arena, SchedulingNodeSelector* selector)
+      : HScheduler(arena, &arm64_latency_visitor_, selector) {}
+  ~HSchedulerARM64() OVERRIDE {}
+
+  bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+  HInstruction::InstructionKind::k##type:
+    switch (instruction->GetKind()) {
+      FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+        return true;
+      FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
+        return true;
+      default:
+        return HScheduler::IsSchedulable(instruction);
+    }
+#undef CASE_INSTRUCTION_KIND
+  }
+
+ private:
+  SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
+  DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
new file mode 100644
index 0000000000..31d13e2a26
--- /dev/null
+++ b/compiler/optimizing/scheduler_test.cc
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "builder.h"
+#include "codegen_test_utils.h"
+#include "common_compiler_test.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "pc_relative_fixups_x86.h"
+#include "register_allocator.h"
+#include "scheduler.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "scheduler_arm64.h"
+#endif
+
+namespace art {
+
+// Return all combinations of ISA and code generator that are executable on
+// hardware, or on simulator, and that we'd like to test.
+static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
+  ::std::vector<CodegenTargetConfig> v;
+  ::std::vector<CodegenTargetConfig> test_config_candidates = {
+#ifdef ART_ENABLE_CODEGEN_arm
+    CodegenTargetConfig(kArm, create_codegen_arm),
+    CodegenTargetConfig(kThumb2, create_codegen_arm),
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+    CodegenTargetConfig(kArm64, create_codegen_arm64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+    CodegenTargetConfig(kX86, create_codegen_x86),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    CodegenTargetConfig(kX86_64, create_codegen_x86_64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    CodegenTargetConfig(kMips, create_codegen_mips),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+    CodegenTargetConfig(kMips64, create_codegen_mips64)
+#endif
+  };
+
+  for (auto test_config : test_config_candidates) {
+    if (CanExecute(test_config.GetInstructionSet())) {
+      v.push_back(test_config);
+    }
+  }
+
+  return v;
+}
+
+class SchedulerTest : public CommonCompilerTest {};
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+TEST_F(SchedulerTest, DependencyGraph) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = CreateGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->AddBlock(block1);
+  graph->SetEntryBlock(entry);
+
+  // entry:
+  // array         ParameterValue
+  // c1            IntConstant
+  // c2            IntConstant
+  // block1:
+  // add1          Add [c1, c2]
+  // add2          Add [add1, c2]
+  // mul           Mul [add1, add2]
+  // div_check     DivZeroCheck [add2] (env: add2, mul)
+  // div           Div [add1, div_check]
+  // array_get1    ArrayGet [array, add1]
+  // array_set1    ArraySet [array, add1, add2]
+  // array_get2    ArrayGet [array, add1]
+  // array_set2    ArraySet [array, add1, add2]
+
+  HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                         dex::TypeIndex(0),
+                                                         0,
+                                                         Primitive::kPrimNot);
+  HInstruction* c1 = graph->GetIntConstant(1);
+  HInstruction* c2 = graph->GetIntConstant(10);
+  HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
+  HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
+  HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
+  HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
+  HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+  HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+  HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+  HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+  HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+
+  DCHECK(div_check->CanThrow());
+
+  entry->AddInstruction(array);
+
+  HInstruction* block_instructions[] = {add1,
+                                        add2,
+                                        mul,
+                                        div_check,
+                                        div,
+                                        array_get1,
+                                        array_set1,
+                                        array_get2,
+                                        array_set2};
+  for (auto instr : block_instructions) {
+    block1->AddInstruction(instr);
+  }
+
+  HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
+                                                            2,
+                                                            graph->GetArtMethod(),
+                                                            0,
+                                                            div_check);
+  div_check->SetRawEnvironment(environment);
+  environment->SetRawEnvAt(0, add2);
+  add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+  environment->SetRawEnvAt(1, mul);
+  mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+  ArenaAllocator* arena = graph->GetArena();
+  CriticalPathSchedulingNodeSelector critical_path_selector;
+  arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
+  SchedulingGraph scheduling_graph(&scheduler, arena);
+  // Instructions must be inserted in reverse order into the scheduling graph.
+  for (auto instr : ReverseRange(block_instructions)) {
+    scheduling_graph.AddNode(instr);
+  }
+
+  // Should not have dependencies cross basic blocks.
+  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+  // Define-use dependency.
+  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+  ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+  ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+  // Read and write dependencies
+  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+  // Env dependency.
+  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+  ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+  // CanThrow.
+  ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+}
+#endif
+
+static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
+                                             bool has_result,
+                                             int expected) {
+  for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    HGraph* graph = CreateCFG(&arena, data);
+
+    // Schedule the graph randomly.
+    HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+    scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
+
+    RunCode(target_config,
+            graph,
+            [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+            has_result, expected);
+  }
+}
+
+TEST_F(SchedulerTest, RandomScheduling) {
+  //
+  // Java source: crafted code to make sure (random) scheduling should get correct result.
+  //
+  //  int result = 0;
+  //  float fr = 10.0f;
+  //  for (int i = 1; i < 10; i++) {
+  //    fr ++;
+  //    int t1 = result >> i;
+  //    int t2 = result * i;
+  //    result = result + t1 - t2;
+  //    fr = fr / i;
+  //    result += (int)fr;
+  //  }
+  //  return result;
+  //
+  const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 << 12 | 2 << 8,          // const/4 v2, #int 0
+    Instruction::CONST_HIGH16 | 0 << 8, 0x4120,       // const/high16 v0, #float 10.0 // #41200000
+    Instruction::CONST_4 | 1 << 12 | 1 << 8,          // const/4 v1, #int 1
+    Instruction::CONST_16 | 5 << 8, 0x000a,           // const/16 v5, #int 10
+    Instruction::IF_GE | 5 << 12 | 1 << 8, 0x0014,    // if-ge v1, v5, 001a // +0014
+    Instruction::CONST_HIGH16 | 5 << 8, 0x3f80,       // const/high16 v5, #float 1.0 // #3f800000
+    Instruction::ADD_FLOAT_2ADDR | 5 << 12 | 0 << 8,  // add-float/2addr v0, v5
+    Instruction::SHR_INT | 3 << 8, 1 << 8 | 2 ,       // shr-int v3, v2, v1
+    Instruction::MUL_INT | 4 << 8, 1 << 8 | 2,        // mul-int v4, v2, v1
+    Instruction::ADD_INT | 5 << 8, 3 << 8 | 2,        // add-int v5, v2, v3
+    Instruction::SUB_INT | 2 << 8, 4 << 8 | 5,        // sub-int v2, v5, v4
+    Instruction::INT_TO_FLOAT | 1 << 12 | 5 << 8,     // int-to-float v5, v1
+    Instruction::DIV_FLOAT_2ADDR | 5 << 12 | 0 << 8,  // div-float/2addr v0, v5
+    Instruction::FLOAT_TO_INT | 0 << 12 | 5 << 8,     // float-to-int v5, v0
+    Instruction::ADD_INT_2ADDR | 5 << 12 | 2 << 8,    // add-int/2addr v2, v5
+    Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8 | 1,   // add-int/lit8 v1, v1, #int 1 // #01
+    Instruction::GOTO | 0xeb << 8,                    // goto 0004 // -0015
+    Instruction::RETURN | 2 << 8);                    // return v2
+
+  constexpr int kNumberOfRuns = 10;
+  for (int i = 0; i < kNumberOfRuns; ++i) {
+    CompileWithRandomSchedulerAndRun(data, true, 138774);
+  }
+}
+
+}  // namespace art