summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/codegen_test.cc331
-rw-r--r--compiler/optimizing/codegen_test_utils.h355
-rw-r--r--compiler/optimizing/nodes.cc16
-rw-r--r--compiler/optimizing/nodes.h4
-rw-r--r--compiler/optimizing/optimizing_compiler.cc6
-rw-r--r--compiler/optimizing/optimizing_unit_test.h3
-rw-r--r--compiler/optimizing/scheduler.cc610
-rw-r--r--compiler/optimizing/scheduler.h487
-rw-r--r--compiler/optimizing/scheduler_arm64.cc196
-rw-r--r--compiler/optimizing/scheduler_arm64.h117
-rw-r--r--compiler/optimizing/scheduler_test.cc238
11 files changed, 2023 insertions, 340 deletions
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index e3f3df0ff5..d1315091f3 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -17,30 +17,15 @@
#include <functional>
#include <memory>
-#include "arch/instruction_set.h"
-#include "arch/arm/instruction_set_features_arm.h"
-#include "arch/arm/registers_arm.h"
-#include "arch/arm64/instruction_set_features_arm64.h"
-#include "arch/mips/instruction_set_features_mips.h"
-#include "arch/mips/registers_mips.h"
-#include "arch/mips64/instruction_set_features_mips64.h"
-#include "arch/mips64/registers_mips64.h"
-#include "arch/x86/instruction_set_features_x86.h"
-#include "arch/x86/registers_x86.h"
-#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
-#include "code_simulator_container.h"
-#include "common_compiler_test.h"
+#include "codegen_test_utils.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "driver/compiler_options.h"
-#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-#include "prepare_for_register_allocation.h"
#include "register_allocator_linear_scan.h"
-#include "ssa_liveness_analysis.h"
#include "utils.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
@@ -48,324 +33,10 @@
#include "utils/mips64/managed_register_mips64.h"
#include "utils/x86/managed_register_x86.h"
-#ifdef ART_ENABLE_CODEGEN_arm
-#include "code_generator_arm.h"
-#include "code_generator_arm_vixl.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-#include "code_generator_arm64.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-#include "code_generator_x86.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86_64
-#include "code_generator_x86_64.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips
-#include "code_generator_mips.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips64
-#include "code_generator_mips64.h"
-#endif
-
#include "gtest/gtest.h"
namespace art {
-typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
-
-class CodegenTargetConfig {
- public:
- CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen)
- : isa_(isa), create_codegen_(create_codegen) {
- }
- InstructionSet GetInstructionSet() const { return isa_; }
- CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) {
- return create_codegen_(graph, compiler_options);
- }
-
- private:
- CodegenTargetConfig() {}
- InstructionSet isa_;
- CreateCodegenFn create_codegen_;
-};
-
-#ifdef ART_ENABLE_CODEGEN_arm
-// Provide our own codegen, that ensures the C calling conventions
-// are preserved. Currently, ART and C do not match as R4 is caller-save
-// in ART, and callee-save in C. Alternatively, we could use or write
-// the stub that saves and restores all registers, but it is easier
-// to just overwrite the code generator.
-class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
- public:
- TestCodeGeneratorARM(HGraph* graph,
- const ArmInstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : arm::CodeGeneratorARM(graph, isa_features, compiler_options) {
- AddAllocatedRegister(Location::RegisterLocation(arm::R6));
- AddAllocatedRegister(Location::RegisterLocation(arm::R7));
- }
-
- void SetupBlockedRegisters() const OVERRIDE {
- arm::CodeGeneratorARM::SetupBlockedRegisters();
- blocked_core_registers_[arm::R4] = true;
- blocked_core_registers_[arm::R6] = false;
- blocked_core_registers_[arm::R7] = false;
- }
-};
-
-// A way to test the VIXL32-based code generator on ARM. This will replace
-// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one.
-class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
- public:
- TestCodeGeneratorARMVIXL(HGraph* graph,
- const ArmInstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
- AddAllocatedRegister(Location::RegisterLocation(arm::R6));
- AddAllocatedRegister(Location::RegisterLocation(arm::R7));
- }
-
- void SetupBlockedRegisters() const OVERRIDE {
- arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
- blocked_core_registers_[arm::R4] = true;
- blocked_core_registers_[arm::R6] = false;
- blocked_core_registers_[arm::R7] = false;
- }
-};
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
- public:
- TestCodeGeneratorX86(HGraph* graph,
- const X86InstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : x86::CodeGeneratorX86(graph, isa_features, compiler_options) {
- // Save edi, we need it for getting enough registers for long multiplication.
- AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
- }
-
- void SetupBlockedRegisters() const OVERRIDE {
- x86::CodeGeneratorX86::SetupBlockedRegisters();
- // ebx is a callee-save register in C, but caller-save for ART.
- blocked_core_registers_[x86::EBX] = true;
-
- // Make edi available.
- blocked_core_registers_[x86::EDI] = false;
- }
-};
-#endif
-
-class InternalCodeAllocator : public CodeAllocator {
- public:
- InternalCodeAllocator() : size_(0) { }
-
- virtual uint8_t* Allocate(size_t size) {
- size_ = size;
- memory_.reset(new uint8_t[size]);
- return memory_.get();
- }
-
- size_t GetSize() const { return size_; }
- uint8_t* GetMemory() const { return memory_.get(); }
-
- private:
- size_t size_;
- std::unique_ptr<uint8_t[]> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
-};
-
-static bool CanExecuteOnHardware(InstructionSet target_isa) {
- return (target_isa == kRuntimeISA)
- // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
- || (kRuntimeISA == kArm && target_isa == kThumb2);
-}
-
-static bool CanExecute(InstructionSet target_isa) {
- CodeSimulatorContainer simulator(target_isa);
- return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
-}
-
-template <typename Expected>
-static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
-
-template <>
-bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
- simulator->RunFrom(reinterpret_cast<intptr_t>(f));
- return simulator->GetCReturnBool();
-}
-
-template <>
-int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
- simulator->RunFrom(reinterpret_cast<intptr_t>(f));
- return simulator->GetCReturnInt32();
-}
-
-template <>
-int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
- simulator->RunFrom(reinterpret_cast<intptr_t>(f));
- return simulator->GetCReturnInt64();
-}
-
-template <typename Expected>
-static void VerifyGeneratedCode(InstructionSet target_isa,
- Expected (*f)(),
- bool has_result,
- Expected expected) {
- ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
-
- // Verify on simulator.
- CodeSimulatorContainer simulator(target_isa);
- if (simulator.CanSimulate()) {
- Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
- if (has_result) {
- ASSERT_EQ(expected, result);
- }
- }
-
- // Verify on hardware.
- if (CanExecuteOnHardware(target_isa)) {
- Expected result = f();
- if (has_result) {
- ASSERT_EQ(expected, result);
- }
- }
-}
-
-template <typename Expected>
-static void Run(const InternalCodeAllocator& allocator,
- const CodeGenerator& codegen,
- bool has_result,
- Expected expected) {
- InstructionSet target_isa = codegen.GetInstructionSet();
-
- typedef Expected (*fptr)();
- CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
- fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
- if (target_isa == kThumb2) {
- // For thumb we need the bottom bit set.
- f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
- }
- VerifyGeneratedCode(target_isa, f, has_result, expected);
-}
-
-static void ValidateGraph(HGraph* graph) {
- GraphChecker graph_checker(graph);
- graph_checker.Run();
- if (!graph_checker.IsValid()) {
- for (const auto& error : graph_checker.GetErrors()) {
- std::cout << error << std::endl;
- }
- }
- ASSERT_TRUE(graph_checker.IsValid());
-}
-
-template <typename Expected>
-static void RunCodeNoCheck(CodeGenerator* codegen,
- HGraph* graph,
- const std::function<void(HGraph*)>& hook_before_codegen,
- bool has_result,
- Expected expected) {
- SsaLivenessAnalysis liveness(graph, codegen);
- PrepareForRegisterAllocation(graph).Run();
- liveness.Analyze();
- RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
- hook_before_codegen(graph);
- InternalCodeAllocator allocator;
- codegen->Compile(&allocator);
- Run(allocator, *codegen, has_result, expected);
-}
-
-template <typename Expected>
-static void RunCode(CodeGenerator* codegen,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
- ValidateGraph(graph);
- RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected);
-}
-
-template <typename Expected>
-static void RunCode(CodegenTargetConfig target_config,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
- CompilerOptions compiler_options;
- std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
- RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
-}
-
-#ifdef ART_ENABLE_CODEGEN_arm
-CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena()) TestCodeGeneratorARM(graph,
- *features_arm.get(),
- compiler_options);
-}
-
-CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
- Arm64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
- *features_arm64.get(),
- compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86_64
-CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
- X86_64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips
-CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
- MipsInstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips64
-CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
- Mips64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
-}
-#endif
-
// Return all combinations of ISA and code generator that are executable on
// hardware, or on simulator, and that we'd like to test.
static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
new file mode 100644
index 0000000000..cd954043f5
--- /dev/null
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm/registers_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/instruction_set.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "arch/mips/registers_mips.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/mips64/registers_mips64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86/registers_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "code_simulator_container.h"
+#include "common_compiler_test.h"
+#include "graph_checker.h"
+#include "prepare_for_register_allocation.h"
+#include "ssa_liveness_analysis.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#include "code_generator_arm_vixl.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
+namespace art {
+
+typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
+
+class CodegenTargetConfig {
+ public:
+ CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen)
+ : isa_(isa), create_codegen_(create_codegen) {
+ }
+ InstructionSet GetInstructionSet() const { return isa_; }
+ CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) {
+ return create_codegen_(graph, compiler_options);
+ }
+
+ private:
+ CodegenTargetConfig() {}
+ InstructionSet isa_;
+ CreateCodegenFn create_codegen_;
+};
+
+#ifdef ART_ENABLE_CODEGEN_arm
+// Provide our own codegen, that ensures the C calling conventions
+// are preserved. Currently, ART and C do not match as R4 is caller-save
+// in ART, and callee-save in C. Alternatively, we could use or write
+// the stub that saves and restores all registers, but it is easier
+// to just overwrite the code generator.
+class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
+ public:
+ TestCodeGeneratorARM(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : arm::CodeGeneratorARM(graph, isa_features, compiler_options) {
+ AddAllocatedRegister(Location::RegisterLocation(arm::R6));
+ AddAllocatedRegister(Location::RegisterLocation(arm::R7));
+ }
+
+ void SetupBlockedRegisters() const OVERRIDE {
+ arm::CodeGeneratorARM::SetupBlockedRegisters();
+ blocked_core_registers_[arm::R4] = true;
+ blocked_core_registers_[arm::R6] = false;
+ blocked_core_registers_[arm::R7] = false;
+ }
+};
+
+// A way to test the VIXL32-based code generator on ARM. This will replace
+// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one.
+class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
+ public:
+ TestCodeGeneratorARMVIXL(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
+ AddAllocatedRegister(Location::RegisterLocation(arm::R6));
+ AddAllocatedRegister(Location::RegisterLocation(arm::R7));
+ }
+
+ void SetupBlockedRegisters() const OVERRIDE {
+ arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
+ blocked_core_registers_[arm::R4] = true;
+ blocked_core_registers_[arm::R6] = false;
+ blocked_core_registers_[arm::R7] = false;
+ }
+};
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
+ public:
+ TestCodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : x86::CodeGeneratorX86(graph, isa_features, compiler_options) {
+ // Save edi, we need it for getting enough registers for long multiplication.
+ AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
+ }
+
+ void SetupBlockedRegisters() const OVERRIDE {
+ x86::CodeGeneratorX86::SetupBlockedRegisters();
+ // ebx is a callee-save register in C, but caller-save for ART.
+ blocked_core_registers_[x86::EBX] = true;
+
+ // Make edi available.
+ blocked_core_registers_[x86::EDI] = false;
+ }
+};
+#endif
+
+class InternalCodeAllocator : public CodeAllocator {
+ public:
+ InternalCodeAllocator() : size_(0) { }
+
+ virtual uint8_t* Allocate(size_t size) {
+ size_ = size;
+ memory_.reset(new uint8_t[size]);
+ return memory_.get();
+ }
+
+ size_t GetSize() const { return size_; }
+ uint8_t* GetMemory() const { return memory_.get(); }
+
+ private:
+ size_t size_;
+ std::unique_ptr<uint8_t[]> memory_;
+
+ DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+};
+
+static bool CanExecuteOnHardware(InstructionSet target_isa) {
+ return (target_isa == kRuntimeISA)
+ // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
+ || (kRuntimeISA == kArm && target_isa == kThumb2);
+}
+
+static bool CanExecute(InstructionSet target_isa) {
+ CodeSimulatorContainer simulator(target_isa);
+ return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
+}
+
+template <typename Expected>
+inline static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
+
+template <>
+inline bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
+ simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+ return simulator->GetCReturnBool();
+}
+
+template <>
+inline int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
+ simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+ return simulator->GetCReturnInt32();
+}
+
+template <>
+inline int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
+ simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+ return simulator->GetCReturnInt64();
+}
+
+template <typename Expected>
+static void VerifyGeneratedCode(InstructionSet target_isa,
+ Expected (*f)(),
+ bool has_result,
+ Expected expected) {
+ ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
+
+ // Verify on simulator.
+ CodeSimulatorContainer simulator(target_isa);
+ if (simulator.CanSimulate()) {
+ Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
+ if (has_result) {
+ ASSERT_EQ(expected, result);
+ }
+ }
+
+ // Verify on hardware.
+ if (CanExecuteOnHardware(target_isa)) {
+ Expected result = f();
+ if (has_result) {
+ ASSERT_EQ(expected, result);
+ }
+ }
+}
+
+template <typename Expected>
+static void Run(const InternalCodeAllocator& allocator,
+ const CodeGenerator& codegen,
+ bool has_result,
+ Expected expected) {
+ InstructionSet target_isa = codegen.GetInstructionSet();
+
+ typedef Expected (*fptr)();
+ CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
+ fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+ if (target_isa == kThumb2) {
+ // For thumb we need the bottom bit set.
+ f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
+ }
+ VerifyGeneratedCode(target_isa, f, has_result, expected);
+}
+
+static void ValidateGraph(HGraph* graph) {
+ GraphChecker graph_checker(graph);
+ graph_checker.Run();
+ if (!graph_checker.IsValid()) {
+ for (const auto& error : graph_checker.GetErrors()) {
+ std::cout << error << std::endl;
+ }
+ }
+ ASSERT_TRUE(graph_checker.IsValid());
+}
+
+template <typename Expected>
+static void RunCodeNoCheck(CodeGenerator* codegen,
+ HGraph* graph,
+ const std::function<void(HGraph*)>& hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ SsaLivenessAnalysis liveness(graph, codegen);
+ PrepareForRegisterAllocation(graph).Run();
+ liveness.Analyze();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
+ hook_before_codegen(graph);
+ InternalCodeAllocator allocator;
+ codegen->Compile(&allocator);
+ Run(allocator, *codegen, has_result, expected);
+}
+
+template <typename Expected>
+static void RunCode(CodeGenerator* codegen,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ ValidateGraph(graph);
+ RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected);
+}
+
+template <typename Expected>
+static void RunCode(CodegenTargetConfig target_config,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ CompilerOptions compiler_options;
+ std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
+ RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
+}
+
+#ifdef ART_ENABLE_CODEGEN_arm
+CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+ ArmInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) TestCodeGeneratorARM(graph,
+ *features_arm.get(),
+ compiler_options);
+}
+
+CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+ ArmInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+ Arm64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
+ *features_arm64.get(),
+ compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
+ MipsInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+ Mips64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
+}
+#endif
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d15145e673..76900f23a9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1354,13 +1354,15 @@ std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind&
return os;
}
-void HInstruction::MoveBefore(HInstruction* cursor) {
- DCHECK(!IsPhi());
- DCHECK(!IsControlFlow());
- DCHECK(CanBeMoved() ||
- // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
- IsShouldDeoptimizeFlag());
- DCHECK(!cursor->IsPhi());
+void HInstruction::MoveBefore(HInstruction* cursor, bool do_checks) {
+ if (do_checks) {
+ DCHECK(!IsPhi());
+ DCHECK(!IsControlFlow());
+ DCHECK(CanBeMoved() ||
+ // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
+ IsShouldDeoptimizeFlag());
+ DCHECK(!cursor->IsPhi());
+ }
next_->previous_ = previous_;
if (previous_ != nullptr) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f0ea9e20e6..acf14aa726 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2065,8 +2065,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
other->ReplaceInput(this, use_index);
}
- // Move `this` instruction before `cursor`.
- void MoveBefore(HInstruction* cursor);
+ // Move `this` instruction before `cursor`
+ void MoveBefore(HInstruction* cursor, bool do_checks = true);
// Move `this` before its first user and out of any loops. If there is no
// out-of-loop user that dominates all other users, move the instruction
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 297500b12f..1ab671022b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -90,6 +90,7 @@
#include "reference_type_propagation.h"
#include "register_allocator_linear_scan.h"
#include "select_generator.h"
+#include "scheduler.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
#include "ssa_builder.h"
@@ -658,10 +659,13 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
new (arena) arm64::InstructionSimplifierArm64(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+ HInstructionScheduling* scheduling =
+ new (arena) HInstructionScheduling(graph, instruction_set);
HOptimization* arm64_optimizations[] = {
simplifier,
side_effects,
- gvn
+ gvn,
+ scheduling,
};
RunOptimizations(arm64_optimizations, arraysize(arm64_optimizations), pass_observer);
break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 58d90176cd..bf963b8996 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -64,6 +64,9 @@ LiveInterval* BuildInterval(const size_t ranges[][2],
void RemoveSuspendChecks(HGraph* graph) {
for (HBasicBlock* block : graph->GetBlocks()) {
if (block != nullptr) {
+ if (block->GetLoopInformation() != nullptr) {
+ block->GetLoopInformation()->SetSuspendCheck(nullptr);
+ }
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
if (current->IsSuspendCheck()) {
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
new file mode 100644
index 0000000000..d65d20cf43
--- /dev/null
+++ b/compiler/optimizing/scheduler.cc
@@ -0,0 +1,610 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+
+#include "prepare_for_register_allocation.h"
+#include "scheduler.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "scheduler_arm64.h"
+#endif
+
+namespace art {
+
+void SchedulingGraph::AddDependency(SchedulingNode* node,
+ SchedulingNode* dependency,
+ bool is_data_dependency) {
+ if (node == nullptr || dependency == nullptr) {
+ // A `nullptr` node indicates an instruction out of scheduling range (eg. in
+ // an other block), so we do not need to add a dependency edge to the graph.
+ return;
+ }
+
+ if (is_data_dependency) {
+ if (!HasImmediateDataDependency(node, dependency)) {
+ node->AddDataPredecessor(dependency);
+ }
+ } else if (!HasImmediateOtherDependency(node, dependency)) {
+ node->AddOtherPredecessor(dependency);
+ }
+}
+
+static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) {
+ // Read after write.
+ if (node.MayDependOn(other)) {
+ return true;
+ }
+
+ // Write after read.
+ if (other.MayDependOn(node)) {
+ return true;
+ }
+
+ // Memory write after write.
+ if (node.DoesAnyWrite() && other.DoesAnyWrite()) {
+ return true;
+ }
+
+ return false;
+}
+
+
+// Check whether `node` depends on `other`, taking into account `SideEffect`
+// information and `CanThrow` information.
+static bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) {
+ if (MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) {
+ return true;
+ }
+
+ if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) {
+ return true;
+ }
+
+ if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) {
+ return true;
+ }
+
+ if (other->CanThrow() && node->CanThrow()) {
+ return true;
+ }
+
+ // Check side-effect dependency between ArrayGet and BoundsCheck.
+ if (node->IsArrayGet() && other->IsBoundsCheck() && node->InputAt(1) == other) {
+ return true;
+ }
+
+ return false;
+}
+
+void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) {
+ SchedulingNode* instruction_node = GetNode(instruction);
+
+ // Define-use dependencies.
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ AddDataDependency(GetNode(use.GetUser()), instruction_node);
+ }
+
+ // Scheduling barrier dependencies.
+ DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
+ if (contains_scheduling_barrier_) {
+ // A barrier depends on instructions after it. And instructions before the
+ // barrier depend on it.
+ for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
+ SchedulingNode* other_node = GetNode(other);
+ bool other_is_barrier = other_node->IsSchedulingBarrier();
+ if (is_scheduling_barrier || other_is_barrier) {
+ AddOtherDependency(other_node, instruction_node);
+ }
+ if (other_is_barrier) {
+ // This other scheduling barrier guarantees ordering of instructions after
+ // it, so avoid creating additional useless dependencies in the graph.
+ // For example if we have
+ // instr_1
+ // barrier_2
+ // instr_3
+ // barrier_4
+ // instr_5
+ // we only create the following non-data dependencies
+ // 1 -> 2
+ // 2 -> 3
+ // 2 -> 4
+ // 3 -> 4
+ // 4 -> 5
+ // and do not create
+ // 1 -> 4
+ // 2 -> 5
+ // Note that in this example we could also avoid creating the dependency
+ // `2 -> 4`. But if we remove `instr_3` that dependency is required to
+ // order the barriers. So we generate it to avoid a special case.
+ break;
+ }
+ }
+ }
+
+ // Side effect dependencies.
+ if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
+ for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
+ SchedulingNode* other_node = GetNode(other);
+ if (other_node->IsSchedulingBarrier()) {
+ // We have reached a scheduling barrier so we can stop further
+ // processing.
+ DCHECK(HasImmediateOtherDependency(other_node, instruction_node));
+ break;
+ }
+ if (HasSideEffectDependency(other, instruction)) {
+ AddOtherDependency(other_node, instruction_node);
+ }
+ }
+ }
+
+ // Environment dependencies.
+ // We do not need to process those if the instruction is a scheduling barrier,
+ // since the barrier already has non-data dependencies on all following
+ // instructions.
+ if (!is_scheduling_barrier) {
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ // Note that here we could stop processing if the environment holder is
+ // across a scheduling barrier. But checking this would likely require
+ // more work than simply iterating through environment uses.
+ AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
+ }
+ }
+}
+
+bool SchedulingGraph::HasImmediateDataDependency(const SchedulingNode* node,
+ const SchedulingNode* other) const {
+ return ContainsElement(node->GetDataPredecessors(), other);
+}
+
+bool SchedulingGraph::HasImmediateDataDependency(const HInstruction* instruction,
+ const HInstruction* other_instruction) const {
+ const SchedulingNode* node = GetNode(instruction);
+ const SchedulingNode* other = GetNode(other_instruction);
+ if (node == nullptr || other == nullptr) {
+ // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
+ // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
+ // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
+ // instruction and other_instruction are in different basic blocks.
+ return false;
+ }
+ return HasImmediateDataDependency(node, other);
+}
+
+bool SchedulingGraph::HasImmediateOtherDependency(const SchedulingNode* node,
+ const SchedulingNode* other) const {
+ return ContainsElement(node->GetOtherPredecessors(), other);
+}
+
+bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instruction,
+ const HInstruction* other_instruction) const {
+ const SchedulingNode* node = GetNode(instruction);
+ const SchedulingNode* other = GetNode(other_instruction);
+ if (node == nullptr || other == nullptr) {
+ // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
+ // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
+ // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
+ // instruction and other_instruction are in different basic blocks.
+ return false;
+ }
+ return HasImmediateOtherDependency(node, other);
+}
+
+static const std::string InstructionTypeId(const HInstruction* instruction) {
+ std::string id;
+ Primitive::Type type = instruction->GetType();
+ if (type == Primitive::kPrimNot) {
+ id.append("l");
+ } else {
+ id.append(Primitive::Descriptor(instruction->GetType()));
+ }
+ // Use lower-case to be closer to the `HGraphVisualizer` output.
+ id[0] = std::tolower(id[0]);
+ id.append(std::to_string(instruction->GetId()));
+ return id;
+}
+
+// Ideally we would reuse the graph visualizer code, but it is not available
+// from here and it is not worth moving all that code only for our use.
+static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
+ const HInstruction* instruction = node->GetInstruction();
+ // Use the instruction typed id as the node identifier.
+ std::string instruction_id = InstructionTypeId(instruction);
+ output << instruction_id << "[shape=record, label=\""
+ << instruction_id << ' ' << instruction->DebugName() << " [";
+ // List the instruction's inputs in its description. When visualizing the
+ // graph this helps differentiating data inputs from other dependencies.
+ const char* seperator = "";
+ for (const HInstruction* input : instruction->GetInputs()) {
+ output << seperator << InstructionTypeId(input);
+ seperator = ",";
+ }
+ output << "]";
+ // Other properties of the node.
+ output << "\\ninternal_latency: " << node->GetInternalLatency();
+ output << "\\ncritical_path: " << node->GetCriticalPath();
+ if (node->IsSchedulingBarrier()) {
+ output << "\\n(barrier)";
+ }
+ output << "\"];\n";
+ // We want program order to go from top to bottom in the graph output, so we
+ // reverse the edges and specify `dir=back`.
+ for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
+ const HInstruction* predecessor_instruction = predecessor->GetInstruction();
+ output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
+ << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
+ }
+ for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
+ const HInstruction* predecessor_instruction = predecessor->GetInstruction();
+ output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
+ << "[dir=back,color=blue]\n";
+ }
+}
+
+void SchedulingGraph::DumpAsDotGraph(const std::string& description,
+ const ArenaVector<SchedulingNode*>& initial_candidates) {
+ // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
+ // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
+ std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
+ // Description of this graph, as a comment.
+ output << "// " << description << "\n";
+ // Start the dot graph. Use an increasing index for easier differentiation.
+ output << "digraph G {\n";
+ for (const auto& entry : nodes_map_) {
+ DumpAsDotNode(output, entry.second);
+ }
+ // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
+ for (auto node : initial_candidates) {
+ const HInstruction* instruction = node->GetInstruction();
+ output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
+ << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
+ }
+ // End of the dot graph.
+ output << "}\n";
+ output.close();
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
+ ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
+ // Schedule condition inputs that can be materialized immediately before their use.
+ // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
+ // immediately, because it is a materialized condition, and will be emitted right before HSelect
+ // in codegen phase.
+ //
+ // i20 HLessThan [...] HLessThan HAdd HAdd
+ // i21 HAdd [...] ===> | | |
+ // i22 HAdd [...] +----------+---------+
+ // i23 HSelect [i21, i22, i20] HSelect
+
+ if (prev_select_ == nullptr) {
+ return nullptr;
+ }
+
+ const HInstruction* instruction = prev_select_->GetInstruction();
+ const HCondition* condition = nullptr;
+ DCHECK(instruction != nullptr);
+
+ if (instruction->IsIf()) {
+ condition = instruction->AsIf()->InputAt(0)->AsCondition();
+ } else if (instruction->IsSelect()) {
+ condition = instruction->AsSelect()->GetCondition()->AsCondition();
+ }
+
+ SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
+
+ if ((condition_node != nullptr) &&
+ condition->HasOnlyOneNonEnvironmentUse() &&
+ ContainsElement(*nodes, condition_node)) {
+ DCHECK(!condition_node->HasUnscheduledSuccessors());
+ // Remove the condition from the list of candidates and schedule it.
+ RemoveElement(*nodes, condition_node);
+ return condition_node;
+ }
+
+ return nullptr;
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
+ ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
+ DCHECK(!nodes->empty());
+ SchedulingNode* select_node = nullptr;
+
+ // Optimize for materialized condition and its emit before use scenario.
+ select_node = SelectMaterializedCondition(nodes, graph);
+
+ if (select_node == nullptr) {
+ // Get highest priority node based on critical path information.
+ select_node = (*nodes)[0];
+ size_t select = 0;
+ for (size_t i = 1, e = nodes->size(); i < e; i++) {
+ SchedulingNode* check = (*nodes)[i];
+ SchedulingNode* candidate = (*nodes)[select];
+ select_node = GetHigherPrioritySchedulingNode(candidate, check);
+ if (select_node == check) {
+ select = i;
+ }
+ }
+ DeleteNodeAtIndex(nodes, select);
+ }
+
+ prev_select_ = select_node;
+ return select_node;
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
+ SchedulingNode* candidate, SchedulingNode* check) const {
+ uint32_t candidate_path = candidate->GetCriticalPath();
+ uint32_t check_path = check->GetCriticalPath();
+ // First look at the critical_path.
+ if (check_path != candidate_path) {
+ return check_path < candidate_path ? check : candidate;
+ }
+ // If both critical paths are equal, schedule instructions with a higher latency
+ // first in program order.
+ return check->GetLatency() < candidate->GetLatency() ? check : candidate;
+}
+
+void HScheduler::Schedule(HGraph* graph) {
+ for (HBasicBlock* block : graph->GetReversePostOrder()) {
+ if (IsSchedulable(block)) {
+ Schedule(block);
+ }
+ }
+}
+
+void HScheduler::Schedule(HBasicBlock* block) {
+ ArenaVector<SchedulingNode*> scheduling_nodes(arena_->Adapter(kArenaAllocScheduler));
+
+ // Build the scheduling graph.
+ scheduling_graph_.Clear();
+ for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction));
+ CalculateLatency(node);
+ scheduling_nodes.push_back(node);
+ }
+
+ if (scheduling_graph_.Size() <= 1) {
+ scheduling_graph_.Clear();
+ return;
+ }
+
+ cursor_ = block->GetLastInstruction();
+
+ // Find the initial candidates for scheduling.
+ candidates_.clear();
+ for (SchedulingNode* node : scheduling_nodes) {
+ if (!node->HasUnscheduledSuccessors()) {
+ node->MaybeUpdateCriticalPath(node->GetLatency());
+ candidates_.push_back(node);
+ }
+ }
+
+ ArenaVector<SchedulingNode*> initial_candidates(arena_->Adapter(kArenaAllocScheduler));
+ if (kDumpDotSchedulingGraphs) {
+ // Remember the list of initial candidates for debug output purposes.
+ initial_candidates.assign(candidates_.begin(), candidates_.end());
+ }
+
+ // Schedule all nodes.
+ while (!candidates_.empty()) {
+ Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_));
+ }
+
+ if (kDumpDotSchedulingGraphs) {
+ // Dump the graph in `dot` format.
+ HGraph* graph = block->GetGraph();
+ std::stringstream description;
+ description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
+ << " B" << block->GetBlockId();
+ scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates);
+ }
+}
+
+void HScheduler::Schedule(SchedulingNode* scheduling_node) {
+ // Check whether any of the node's predecessors will be valid candidates after
+ // this node is scheduled.
+ uint32_t path_to_node = scheduling_node->GetCriticalPath();
+ for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
+ predecessor->MaybeUpdateCriticalPath(
+ path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
+ predecessor->DecrementNumberOfUnscheduledSuccessors();
+ if (!predecessor->HasUnscheduledSuccessors()) {
+ candidates_.push_back(predecessor);
+ }
+ }
+ for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
+ // Do not update the critical path.
+ // The 'other' (so 'non-data') dependencies (usually) do not represent a
+ // 'material' dependency of nodes on others. They exist for program
+ // correctness. So we do not use them to compute the critical path.
+ predecessor->DecrementNumberOfUnscheduledSuccessors();
+ if (!predecessor->HasUnscheduledSuccessors()) {
+ candidates_.push_back(predecessor);
+ }
+ }
+
+ Schedule(scheduling_node->GetInstruction());
+}
+
+// Move an instruction after cursor instruction inside one basic block.
+static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
+ DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
+ DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
+ DCHECK(!instruction->IsControlFlow());
+ DCHECK(!cursor->IsControlFlow());
+ instruction->MoveBefore(cursor->GetNext(), /* do_checks */ false);
+}
+
+void HScheduler::Schedule(HInstruction* instruction) {
+ if (instruction == cursor_) {
+ cursor_ = cursor_->GetPrevious();
+ } else {
+ MoveAfterInBlock(instruction, cursor_);
+ }
+}
+
+bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
+ // We want to avoid exhaustively listing all instructions, so we first check
+ // for instruction categories that we know are safe.
+ if (instruction->IsControlFlow() ||
+ instruction->IsConstant()) {
+ return true;
+ }
+ // Currently all unary and binary operations are safe to schedule, so avoid
+ // checking for each of them individually.
+ // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
+ // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
+ // the exhaustive lists here.
+ if (instruction->IsUnaryOperation()) {
+ DCHECK(instruction->IsBooleanNot() ||
+ instruction->IsNot() ||
+ instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
+ return true;
+ }
+ if (instruction->IsBinaryOperation()) {
+ DCHECK(instruction->IsAdd() ||
+ instruction->IsAnd() ||
+ instruction->IsCompare() ||
+ instruction->IsCondition() ||
+ instruction->IsDiv() ||
+ instruction->IsMul() ||
+ instruction->IsOr() ||
+ instruction->IsRem() ||
+ instruction->IsRor() ||
+ instruction->IsShl() ||
+ instruction->IsShr() ||
+ instruction->IsSub() ||
+ instruction->IsUShr() ||
+ instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
+ return true;
+ }
+ // The scheduler should not see any of these.
+ DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
+ // List of instructions explicitly excluded:
+ // HClearException
+ // HClinitCheck
+ // HDeoptimize
+ // HLoadClass
+ // HLoadException
+ // HMemoryBarrier
+ // HMonitorOperation
+ // HNativeDebugInfo
+ // HThrow
+ // HTryBoundary
+ // TODO: Some of the instructions above may be safe to schedule (maybe as
+ // scheduling barriers).
+ return instruction->IsArrayGet() ||
+ instruction->IsArraySet() ||
+ instruction->IsArrayLength() ||
+ instruction->IsBoundType() ||
+ instruction->IsBoundsCheck() ||
+ instruction->IsCheckCast() ||
+ instruction->IsClassTableGet() ||
+ instruction->IsCurrentMethod() ||
+ instruction->IsDivZeroCheck() ||
+ instruction->IsInstanceFieldGet() ||
+ instruction->IsInstanceFieldSet() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsInvokeInterface() ||
+ instruction->IsInvokeStaticOrDirect() ||
+ instruction->IsInvokeUnresolved() ||
+ instruction->IsInvokeVirtual() ||
+ instruction->IsLoadString() ||
+ instruction->IsNewArray() ||
+ instruction->IsNewInstance() ||
+ instruction->IsNullCheck() ||
+ instruction->IsPackedSwitch() ||
+ instruction->IsParameterValue() ||
+ instruction->IsPhi() ||
+ instruction->IsReturn() ||
+ instruction->IsReturnVoid() ||
+ instruction->IsSelect() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsStaticFieldSet() ||
+ instruction->IsSuspendCheck() ||
+ instruction->IsTypeConversion() ||
+ instruction->IsUnresolvedInstanceFieldGet() ||
+ instruction->IsUnresolvedInstanceFieldSet() ||
+ instruction->IsUnresolvedStaticFieldGet() ||
+ instruction->IsUnresolvedStaticFieldSet();
+}
+
+bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
+ // We may be only interested in loop blocks.
+ if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
+ return false;
+ }
+ if (block->GetTryCatchInformation() != nullptr) {
+ // Do not schedule blocks that are part of try-catch.
+ // Because scheduler cannot see if catch block has assumptions on the instruction order in
+ // the try block. In following example, if we enable scheduler for the try block,
+ // MulitiplyAccumulate may be scheduled before DivZeroCheck,
+ // which can result in an incorrect value in the catch block.
+ // try {
+ // a = a/b; // DivZeroCheck
+ // // Div
+ // c = c*d+e; // MulitiplyAccumulate
+ // } catch {System.out.print(c); }
+ return false;
+ }
+ // Check whether all instructions in this block are schedulable.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (!IsSchedulable(it.Current())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
+ return instr->IsControlFlow() ||
+ // Don't break calling convention.
+ instr->IsParameterValue() ||
+ // Code generation of goto relies on SuspendCheck's position.
+ instr->IsSuspendCheck();
+}
+
+void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
+ bool schedule_randomly) {
+ // Avoid compilation error when compiling for unsupported instruction set.
+ UNUSED(only_optimize_loop_blocks);
+ UNUSED(schedule_randomly);
+ switch (instruction_set_) {
+#ifdef ART_ENABLE_CODEGEN_arm64
+ case kArm64: {
+ // Phase-local allocator that allocates scheduler internal data structures like
+ // scheduling nodes, internel nodes map, dependencies, etc.
+ ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ RandomSchedulingNodeSelector random_selector;
+ SchedulingNodeSelector* selector = schedule_randomly
+ ? static_cast<SchedulingNodeSelector*>(&random_selector)
+ : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+
+ arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
+ scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+ scheduler.Schedule(graph_);
+ break;
+ }
+#endif
+ default:
+ break;
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
new file mode 100644
index 0000000000..ab0dad4300
--- /dev/null
+++ b/compiler/optimizing/scheduler.h
@@ -0,0 +1,487 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_H_
+
+#include <fstream>
+
+#include "base/time_utils.h"
+#include "driver/compiler_driver.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+// General description of instruction scheduling.
+//
+// This pass tries to improve the quality of the generated code by reordering
+// instructions in the graph to avoid execution delays caused by execution
+// dependencies.
+// Currently, scheduling is performed at the block level, so no `HInstruction`
+// ever leaves its block in this pass.
+//
+// The scheduling process iterates through blocks in the graph. For blocks that
+// we can and want to schedule:
+// 1) Build a dependency graph for instructions.
+// It includes data dependencies (inputs/uses), but also environment
+// dependencies and side-effect dependencies.
+// 2) Schedule the dependency graph.
+// This is a topological sort of the dependency graph, using heuristics to
+// decide what node to scheduler first when there are multiple candidates.
+//
+// A few factors impacting the quality of the scheduling are:
+// - The heuristics used to decide what node to schedule in the topological sort
+// when there are multiple valid candidates. There is a wide range of
+// complexity possible here, going from a simple model only considering
+// latencies, to a super detailed CPU pipeline model.
+// - Fewer dependencies in the dependency graph give more freedom for the
+// scheduling heuristics. For example de-aliasing can allow possibilities for
+// reordering of memory accesses.
+// - The level of abstraction of the IR. It is easier to evaluate scheduling for
+// IRs that translate to a single assembly instruction than for IRs
+// that generate multiple assembly instructions or generate different code
+// depending on properties of the IR.
+// - Scheduling is performed before register allocation, it is not aware of the
+// impact of moving instructions on register allocation.
+//
+//
+// The scheduling code uses the terms predecessors, successors, and dependencies.
+// This can be confusing at times, so here are clarifications.
+// These terms are used from the point of view of the program dependency graph. So
+// the inputs of an instruction are part of its dependencies, and hence part its
+// predecessors. So the uses of an instruction are (part of) its successors.
+// (Side-effect dependencies can yield predecessors or successors that are not
+// inputs or uses.)
+//
+// Here is a trivial example. For the Java code:
+//
+// int a = 1 + 2;
+//
+// we would have the instructions
+//
+// i1 HIntConstant 1
+// i2 HIntConstant 2
+// i3 HAdd [i1,i2]
+//
+// `i1` and `i2` are predecessors of `i3`.
+// `i3` is a successor of `i1` and a successor of `i2`.
+// In a scheduling graph for this code we would have three nodes `n1`, `n2`,
+// and `n3` (respectively for instructions `i1`, `i1`, and `i3`).
+// Conceptually the program dependency graph for this would contain two edges
+//
+// n1 -> n3
+// n2 -> n3
+//
+// Since we schedule backwards (starting from the last instruction in each basic
+// block), the implementation of nodes keeps a list of pointers their
+// predecessors. So `n3` would keep pointers to its predecessors `n1` and `n2`.
+//
+// Node dependencies are also referred to from the program dependency graph
+// point of view: we say that node `B` immediately depends on `A` if there is an
+// edge from `A` to `B` in the program dependency graph. `A` is a predecessor of
+// `B`, `B` is a successor of `A`. In the example above `n3` depends on `n1` and
+// `n2`.
+// Since nodes in the scheduling graph keep a list of their predecessors, node
+// `B` will have a pointer to its predecessor `A`.
+// As we schedule backwards, `B` will be selected for scheduling before `A` is.
+//
+// So the scheduling for the example above could happen as follow
+//
+// |---------------------------+------------------------|
+// | candidates for scheduling | instructions scheduled |
+// | --------------------------+------------------------|
+//
+// The only node without successors is `n3`, so it is the only initial
+// candidate.
+//
+// | n3 | (none) |
+//
+// We schedule `n3` as the last (and only) instruction. All its predecessors
+// that do not have any unscheduled successors become candidate. That is, `n1`
+// and `n2` become candidates.
+//
+// | n1, n2 | n3 |
+//
+// One of the candidates is selected. In practice this is where scheduling
+// heuristics kick in, to decide which of the candidates should be selected.
+// In this example, let it be `n1`. It is scheduled before previously scheduled
+// nodes (in program order). There are no other nodes to add to the list of
+// candidates.
+//
+// | n2 | n1 |
+// | | n3 |
+//
+// The only candidate available for scheduling is `n2`. Schedule it before
+// (in program order) the previously scheduled nodes.
+//
+// | (none) | n2 |
+// | | n1 |
+// | | n3 |
+// |---------------------------+------------------------|
+//
+// So finally the instructions will be executed in the order `i2`, `i1`, and `i3`.
+// In this trivial example, it does not matter which of `i1` and `i2` is
+// scheduled first since they are constants. However the same process would
+// apply if `i1` and `i2` were actual operations (for example `HMul` and `HDiv`).
+
+// Set to true to have instruction scheduling dump scheduling graphs to the file
+// `scheduling_graphs.dot`. See `SchedulingGraph::DumpAsDotGraph()`.
+static constexpr bool kDumpDotSchedulingGraphs = false;
+
+// Typically used as a default instruction latency.
+static constexpr uint32_t kGenericInstructionLatency = 1;
+
+class HScheduler;
+
+/**
+ * A node representing an `HInstruction` in the `SchedulingGraph`.
+ */
+class SchedulingNode : public ArenaObject<kArenaAllocScheduler> {
+ public:
+ SchedulingNode(HInstruction* instr, ArenaAllocator* arena, bool is_scheduling_barrier)
+ : latency_(0),
+ internal_latency_(0),
+ critical_path_(0),
+ instruction_(instr),
+ is_scheduling_barrier_(is_scheduling_barrier),
+ data_predecessors_(arena->Adapter(kArenaAllocScheduler)),
+ other_predecessors_(arena->Adapter(kArenaAllocScheduler)),
+ num_unscheduled_successors_(0) {
+ data_predecessors_.reserve(kPreallocatedPredecessors);
+ }
+
+ void AddDataPredecessor(SchedulingNode* predecessor) {
+ data_predecessors_.push_back(predecessor);
+ predecessor->num_unscheduled_successors_++;
+ }
+
+ void AddOtherPredecessor(SchedulingNode* predecessor) {
+ other_predecessors_.push_back(predecessor);
+ predecessor->num_unscheduled_successors_++;
+ }
+
+ void DecrementNumberOfUnscheduledSuccessors() {
+ num_unscheduled_successors_--;
+ }
+
+ void MaybeUpdateCriticalPath(uint32_t other_critical_path) {
+ critical_path_ = std::max(critical_path_, other_critical_path);
+ }
+
+ bool HasUnscheduledSuccessors() const {
+ return num_unscheduled_successors_ != 0;
+ }
+
+ HInstruction* GetInstruction() const { return instruction_; }
+ uint32_t GetLatency() const { return latency_; }
+ void SetLatency(uint32_t latency) { latency_ = latency; }
+ uint32_t GetInternalLatency() const { return internal_latency_; }
+ void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; }
+ uint32_t GetCriticalPath() const { return critical_path_; }
+ bool IsSchedulingBarrier() const { return is_scheduling_barrier_; }
+ const ArenaVector<SchedulingNode*>& GetDataPredecessors() const { return data_predecessors_; }
+ const ArenaVector<SchedulingNode*>& GetOtherPredecessors() const { return other_predecessors_; }
+
+ private:
+ // The latency of this node. It represents the latency between the moment the
+ // last instruction for this node has executed to the moment the result
+ // produced by this node is available to users.
+ uint32_t latency_;
+ // This represents the time spent *within* the generated code for this node.
+ // It should be zero for nodes that only generate a single instruction.
+ uint32_t internal_latency_;
+
+ // The critical path from this instruction to the end of scheduling. It is
+ // used by the scheduling heuristics to measure the priority of this instruction.
+ // It is defined as
+ // critical_path_ = latency_ + max((use.internal_latency_ + use.critical_path_) for all uses)
+ // (Note that here 'uses' is equivalent to 'data successors'. Also see comments in
+ // `HScheduler::Schedule(SchedulingNode* scheduling_node)`).
+ uint32_t critical_path_;
+
+ // The instruction that this node represents.
+ HInstruction* const instruction_;
+
+ // If a node is scheduling barrier, other nodes cannot be scheduled before it.
+ const bool is_scheduling_barrier_;
+
+ // The lists of predecessors. They cannot be scheduled before this node. Once
+ // this node is scheduled, we check whether any of its predecessors has become a
+ // valid candidate for scheduling.
+ // Predecessors in `data_predecessors_` are data dependencies. Those in
+ // `other_predecessors_` contain side-effect dependencies, environment
+ // dependencies, and scheduling barrier dependencies.
+ ArenaVector<SchedulingNode*> data_predecessors_;
+ ArenaVector<SchedulingNode*> other_predecessors_;
+
+ // The number of unscheduled successors for this node. This number is
+ // decremented as successors are scheduled. When it reaches zero this node
+ // becomes a valid candidate to schedule.
+ uint32_t num_unscheduled_successors_;
+
+ static constexpr size_t kPreallocatedPredecessors = 4;
+};
+
+/*
+ * Directed acyclic graph for scheduling.
+ */
+class SchedulingGraph : public ValueObject {
+ public:
+ SchedulingGraph(const HScheduler* scheduler, ArenaAllocator* arena)
+ : scheduler_(scheduler),
+ arena_(arena),
+ contains_scheduling_barrier_(false),
+ nodes_map_(arena_->Adapter(kArenaAllocScheduler)) {}
+
+ SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
+ SchedulingNode* node = new (arena_) SchedulingNode(instr, arena_, is_scheduling_barrier);
+ nodes_map_.Insert(std::make_pair(instr, node));
+ contains_scheduling_barrier_ |= is_scheduling_barrier;
+ AddDependencies(instr, is_scheduling_barrier);
+ return node;
+ }
+
+ void Clear() {
+ nodes_map_.Clear();
+ contains_scheduling_barrier_ = false;
+ }
+
+ SchedulingNode* GetNode(const HInstruction* instr) const {
+ auto it = nodes_map_.Find(instr);
+ if (it == nodes_map_.end()) {
+ return nullptr;
+ } else {
+ return it->second;
+ }
+ }
+
+ bool IsSchedulingBarrier(const HInstruction* instruction) const;
+
+ bool HasImmediateDataDependency(const SchedulingNode* node, const SchedulingNode* other) const;
+ bool HasImmediateDataDependency(const HInstruction* node, const HInstruction* other) const;
+ bool HasImmediateOtherDependency(const SchedulingNode* node, const SchedulingNode* other) const;
+ bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const;
+
+ size_t Size() const {
+ return nodes_map_.Size();
+ }
+
+ // Dump the scheduling graph, in dot file format, appending it to the file
+ // `scheduling_graphs.dot`.
+ void DumpAsDotGraph(const std::string& description,
+ const ArenaVector<SchedulingNode*>& initial_candidates);
+
+ protected:
+ void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency);
+ void AddDataDependency(SchedulingNode* node, SchedulingNode* dependency) {
+ AddDependency(node, dependency, /*is_data_dependency*/true);
+ }
+ void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) {
+ AddDependency(node, dependency, /*is_data_dependency*/false);
+ }
+
+ // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects.
+ void AddDependencies(HInstruction* instruction, bool is_scheduling_barrier = false);
+
+ const HScheduler* const scheduler_;
+
+ ArenaAllocator* const arena_;
+
+ bool contains_scheduling_barrier_;
+
+ ArenaHashMap<const HInstruction*, SchedulingNode*> nodes_map_;
+};
+
+/*
+ * The visitors derived from this base class are used by schedulers to evaluate
+ * the latencies of `HInstruction`s.
+ */
+class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
+ public:
+ // This class and its sub-classes will never be used to drive a visit of an
+ // `HGraph` but only to visit `HInstructions` one at a time, so we do not need
+ // to pass a valid graph to `HGraphDelegateVisitor()`.
+ SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+
+ void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
+ "Architecture-specific scheduling latency visitors must handle all instructions"
+ " (potentially by overriding the generic `VisitInstruction()`.";
+ UNREACHABLE();
+ }
+
+ void Visit(HInstruction* instruction) {
+ instruction->Accept(this);
+ }
+
+ void CalculateLatency(SchedulingNode* node) {
+ // By default nodes have no internal latency.
+ last_visited_internal_latency_ = 0;
+ Visit(node->GetInstruction());
+ }
+
+ uint32_t GetLastVisitedLatency() const { return last_visited_latency_; }
+ uint32_t GetLastVisitedInternalLatency() const { return last_visited_internal_latency_; }
+
+ protected:
+ // The latency of the most recent visited SchedulingNode.
+ // This is for reporting the latency value to the user of this visitor.
+ uint32_t last_visited_latency_;
+ // This represents the time spent *within* the generated code for the most recent visited
+ // SchedulingNode. This is for reporting the internal latency value to the user of this visitor.
+ uint32_t last_visited_internal_latency_;
+};
+
+class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
+ public:
+ virtual SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) = 0;
+ virtual ~SchedulingNodeSelector() {}
+ protected:
+ static void DeleteNodeAtIndex(ArenaVector<SchedulingNode*>* nodes, size_t index) {
+ (*nodes)[index] = nodes->back();
+ nodes->pop_back();
+ }
+};
+
+/*
+ * Select a `SchedulingNode` at random within the candidates.
+ */
+class RandomSchedulingNodeSelector : public SchedulingNodeSelector {
+ public:
+ explicit RandomSchedulingNodeSelector() : seed_(0) {
+ seed_ = static_cast<uint32_t>(NanoTime());
+ srand(seed_);
+ }
+
+ SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) OVERRIDE {
+ UNUSED(graph);
+ DCHECK(!nodes->empty());
+ size_t select = rand_r(&seed_) % nodes->size();
+ SchedulingNode* select_node = (*nodes)[select];
+ DeleteNodeAtIndex(nodes, select);
+ return select_node;
+ }
+
+ uint32_t seed_;
+};
+
+/*
+ * Select a `SchedulingNode` according to critical path information,
+ * with heuristics to favor certain instruction patterns like materialized condition.
+ */
+class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
+ public:
+ CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
+
+ SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) OVERRIDE;
+
+ protected:
+ SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate,
+ SchedulingNode* check) const;
+
+ SchedulingNode* SelectMaterializedCondition(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) const;
+
+ private:
+ const SchedulingNode* prev_select_;
+};
+
+class HScheduler {
+ public:
+ HScheduler(ArenaAllocator* arena,
+ SchedulingLatencyVisitor* latency_visitor,
+ SchedulingNodeSelector* selector)
+ : arena_(arena),
+ latency_visitor_(latency_visitor),
+ selector_(selector),
+ only_optimize_loop_blocks_(true),
+ scheduling_graph_(this, arena),
+ candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
+ virtual ~HScheduler() {}
+
+ void Schedule(HGraph* graph);
+
+ void SetOnlyOptimizeLoopBlocks(bool loop_only) { only_optimize_loop_blocks_ = loop_only; }
+
+ // Instructions can not be rescheduled across a scheduling barrier.
+ virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
+
+ protected:
+ void Schedule(HBasicBlock* block);
+ void Schedule(SchedulingNode* scheduling_node);
+ void Schedule(HInstruction* instruction);
+
+ // Any instruction returning `false` via this method will prevent its
+ // containing basic block from being scheduled.
+ // This method is used to restrict scheduling to instructions that we know are
+ // safe to handle.
+ virtual bool IsSchedulable(const HInstruction* instruction) const;
+ bool IsSchedulable(const HBasicBlock* block) const;
+
+ void CalculateLatency(SchedulingNode* node) {
+ latency_visitor_->CalculateLatency(node);
+ node->SetLatency(latency_visitor_->GetLastVisitedLatency());
+ node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
+ }
+
+ ArenaAllocator* const arena_;
+ SchedulingLatencyVisitor* const latency_visitor_;
+ SchedulingNodeSelector* const selector_;
+ bool only_optimize_loop_blocks_;
+
+ // We instantiate the members below as part of this class to avoid
+ // instantiating them locally for every chunk scheduled.
+ SchedulingGraph scheduling_graph_;
+ // A pointer indicating where the next instruction to be scheduled will be inserted.
+ HInstruction* cursor_;
+ // The list of candidates for scheduling. A node becomes a candidate when all
+ // its predecessors have been scheduled.
+ ArenaVector<SchedulingNode*> candidates_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HScheduler);
+};
+
+inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction) const {
+ return scheduler_->IsSchedulingBarrier(instruction);
+}
+
+class HInstructionScheduling : public HOptimization {
+ public:
+ HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+ : HOptimization(graph, kInstructionScheduling),
+ instruction_set_(instruction_set) {}
+
+ void Run() {
+ Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false);
+ }
+ void Run(bool only_optimize_loop_blocks, bool schedule_randomly);
+
+ static constexpr const char* kInstructionScheduling = "scheduler";
+
+ const InstructionSet instruction_set_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_H_
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
new file mode 100644
index 0000000000..e3701fbcb1
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scheduler_arm64.h"
+#include "code_generator_utils.h"
+
+namespace art {
+namespace arm64 {
+
+void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) {
+ last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType())
+ ? kArm64FloatingPointOpLatency
+ : kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight(
+ HBitwiseNegatedRight* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArm64DataProcWithShifterOp(
+ HArm64DataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitIntermediateAddress(
+ HIntermediateAddress* ATTRIBUTE_UNUSED) {
+ // Although the code generated is a simple `add` instruction, we found through empirical results
+ // that spacing it from its use in memory accesses was beneficial.
+ last_visited_latency_ = kArm64IntegerOpLatency + 2;
+}
+
+void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) {
+ if (!instruction->GetArray()->IsIntermediateAddress()) {
+ // Take the intermediate address computation into account.
+ last_visited_internal_latency_ = kArm64IntegerOpLatency;
+ }
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryStoreLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64IntegerOpLatency;
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) {
+ Primitive::Type type = instr->GetResultType();
+ switch (type) {
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArm64DivFloatLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArm64DivDoubleLatency;
+ break;
+ default:
+ // Follow the code path used by code generation.
+ if (instr->GetRight()->IsConstant()) {
+ int64_t imm = Int64FromConstant(instr->GetRight()->AsConstant());
+ if (imm == 0) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = 0;
+ } else if (imm == 1 || imm == -1) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64MulIntegerLatency;
+ }
+ } else {
+ last_visited_latency_ = kArm64DivIntegerLatency;
+ }
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64LoadStringInternalLatency;
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) {
+ last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType())
+ ? kArm64MulFloatingPointLatency
+ : kArm64MulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitNewInstance(HNewInstance* instruction) {
+ if (instruction->IsStringAlloc()) {
+ last_visited_internal_latency_ = 2 + kArm64MemoryLoadLatency + kArm64CallInternalLatency;
+ } else {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ }
+ last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) {
+ if (Primitive::IsFloatingPointType(instruction->GetResultType())) {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64CallLatency;
+ } else {
+ // Follow the code path used by code generation.
+ if (instruction->GetRight()->IsConstant()) {
+ int64_t imm = Int64FromConstant(instruction->GetRight()->AsConstant());
+ if (imm == 0) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = 0;
+ } else if (imm == 1 || imm == -1) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64MulIntegerLatency;
+ }
+ } else {
+ last_visited_internal_latency_ = kArm64DivIntegerLatency;
+ last_visited_latency_ = kArm64MulIntegerLatency;
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ DCHECK((block->GetLoopInformation() != nullptr) ||
+ (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) {
+ if (Primitive::IsFloatingPointType(instr->GetResultType()) ||
+ Primitive::IsFloatingPointType(instr->GetInputType())) {
+ last_visited_latency_ = kArm64TypeConversionFloatingPointIntegerLatency;
+ } else {
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ }
+}
+
+} // namespace arm64
+} // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
new file mode 100644
index 0000000000..702027c535
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+
+#include "scheduler.h"
+
+namespace art {
+namespace arm64 {
+
+static constexpr uint32_t kArm64MemoryLoadLatency = 5;
+static constexpr uint32_t kArm64MemoryStoreLatency = 3;
+
+static constexpr uint32_t kArm64CallInternalLatency = 10;
+static constexpr uint32_t kArm64CallLatency = 5;
+
+// AArch64 instruction latency.
+// We currently assume that all arm64 CPUs share the same instruction latency list.
+static constexpr uint32_t kArm64IntegerOpLatency = 2;
+static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
+
+
+static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
+static constexpr uint32_t kArm64DivDoubleLatency = 30;
+static constexpr uint32_t kArm64DivFloatLatency = 15;
+static constexpr uint32_t kArm64DivIntegerLatency = 5;
+static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
+static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
+static constexpr uint32_t kArm64MulIntegerLatency = 6;
+static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+
+class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
+ public:
+ // Default visitor for instructions not handled specifically below.
+ void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
+ M(ArrayGet , unused) \
+ M(ArrayLength , unused) \
+ M(ArraySet , unused) \
+ M(BinaryOperation , unused) \
+ M(BoundsCheck , unused) \
+ M(Div , unused) \
+ M(InstanceFieldGet , unused) \
+ M(InstanceOf , unused) \
+ M(Invoke , unused) \
+ M(LoadString , unused) \
+ M(Mul , unused) \
+ M(NewArray , unused) \
+ M(NewInstance , unused) \
+ M(Rem , unused) \
+ M(StaticFieldGet , unused) \
+ M(SuspendCheck , unused) \
+ M(TypeConversion , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+ M(BitwiseNegatedRight, unused) \
+ M(MultiplyAccumulate, unused) \
+ M(IntermediateAddress, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused) \
+ void Visit##type(H##type* instruction) OVERRIDE;
+
+ FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+};
+
+class HSchedulerARM64 : public HScheduler {
+ public:
+ HSchedulerARM64(ArenaAllocator* arena, SchedulingNodeSelector* selector)
+ : HScheduler(arena, &arm64_latency_visitor_, selector) {}
+ ~HSchedulerARM64() OVERRIDE {}
+
+ bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+ HInstruction::InstructionKind::k##type:
+ switch (instruction->GetKind()) {
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+ return true;
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
+ return true;
+ default:
+ return HScheduler::IsSchedulable(instruction);
+ }
+#undef CASE_INSTRUCTION_KIND
+ }
+
+ private:
+ SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
+ DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
+};
+
+} // namespace arm64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
new file mode 100644
index 0000000000..31d13e2a26
--- /dev/null
+++ b/compiler/optimizing/scheduler_test.cc
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "builder.h"
+#include "codegen_test_utils.h"
+#include "common_compiler_test.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "pc_relative_fixups_x86.h"
+#include "register_allocator.h"
+#include "scheduler.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "scheduler_arm64.h"
+#endif
+
+namespace art {
+
+// Return all combinations of ISA and code generator that are executable on
+// hardware, or on simulator, and that we'd like to test.
+static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
+ ::std::vector<CodegenTargetConfig> v;
+ ::std::vector<CodegenTargetConfig> test_config_candidates = {
+#ifdef ART_ENABLE_CODEGEN_arm
+ CodegenTargetConfig(kArm, create_codegen_arm),
+ CodegenTargetConfig(kThumb2, create_codegen_arm),
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ CodegenTargetConfig(kArm64, create_codegen_arm64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ CodegenTargetConfig(kX86, create_codegen_x86),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ CodegenTargetConfig(kX86_64, create_codegen_x86_64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ CodegenTargetConfig(kMips, create_codegen_mips),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+ CodegenTargetConfig(kMips64, create_codegen_mips64)
+#endif
+ };
+
+ for (auto test_config : test_config_candidates) {
+ if (CanExecute(test_config.GetInstructionSet())) {
+ v.push_back(test_config);
+ }
+ }
+
+ return v;
+}
+
+class SchedulerTest : public CommonCompilerTest {};
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+TEST_F(SchedulerTest, DependencyGraph) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->AddBlock(block1);
+ graph->SetEntryBlock(entry);
+
+ // entry:
+ // array ParameterValue
+ // c1 IntConstant
+ // c2 IntConstant
+ // block1:
+ // add1 Add [c1, c2]
+ // add2 Add [add1, c2]
+ // mul Mul [add1, add2]
+ // div_check DivZeroCheck [add2] (env: add2, mul)
+ // div Div [add1, div_check]
+ // array_get1 ArrayGet [array, add1]
+ // array_set1 ArraySet [array, add1, add2]
+ // array_get2 ArrayGet [array, add1]
+ // array_set2 ArraySet [array, add1, add2]
+
+ HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ Primitive::kPrimNot);
+ HInstruction* c1 = graph->GetIntConstant(1);
+ HInstruction* c2 = graph->GetIntConstant(10);
+ HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
+ HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
+ HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
+ HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
+ HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+ HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+ HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+
+ DCHECK(div_check->CanThrow());
+
+ entry->AddInstruction(array);
+
+ HInstruction* block_instructions[] = {add1,
+ add2,
+ mul,
+ div_check,
+ div,
+ array_get1,
+ array_set1,
+ array_get2,
+ array_set2};
+ for (auto instr : block_instructions) {
+ block1->AddInstruction(instr);
+ }
+
+ HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
+ 2,
+ graph->GetArtMethod(),
+ 0,
+ div_check);
+ div_check->SetRawEnvironment(environment);
+ environment->SetRawEnvAt(0, add2);
+ add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+ environment->SetRawEnvAt(1, mul);
+ mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+ ArenaAllocator* arena = graph->GetArena();
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
+ SchedulingGraph scheduling_graph(&scheduler, arena);
+ // Instructions must be inserted in reverse order into the scheduling graph.
+ for (auto instr : ReverseRange(block_instructions)) {
+ scheduling_graph.AddNode(instr);
+ }
+
+ // Should not have dependencies cross basic blocks.
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+ // Define-use dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+ // Read and write dependencies
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+ // Env dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+ // CanThrow.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+}
+#endif
+
+static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
+ bool has_result,
+ int expected) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ HGraph* graph = CreateCFG(&arena, data);
+
+ // Schedule the graph randomly.
+ HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+ scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
+
+ RunCode(target_config,
+ graph,
+ [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+ has_result, expected);
+ }
+}
+
+TEST_F(SchedulerTest, RandomScheduling) {
+ //
+ // Java source: crafted code to make sure (random) scheduling should get correct result.
+ //
+ // int result = 0;
+ // float fr = 10.0f;
+ // for (int i = 1; i < 10; i++) {
+ // fr ++;
+ // int t1 = result >> i;
+ // int t2 = result * i;
+ // result = result + t1 - t2;
+ // fr = fr / i;
+ // result += (int)fr;
+ // }
+ // return result;
+ //
+ const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+ Instruction::CONST_4 | 0 << 12 | 2 << 8, // const/4 v2, #int 0
+ Instruction::CONST_HIGH16 | 0 << 8, 0x4120, // const/high16 v0, #float 10.0 // #41200000
+ Instruction::CONST_4 | 1 << 12 | 1 << 8, // const/4 v1, #int 1
+ Instruction::CONST_16 | 5 << 8, 0x000a, // const/16 v5, #int 10
+ Instruction::IF_GE | 5 << 12 | 1 << 8, 0x0014, // if-ge v1, v5, 001a // +0014
+ Instruction::CONST_HIGH16 | 5 << 8, 0x3f80, // const/high16 v5, #float 1.0 // #3f800000
+ Instruction::ADD_FLOAT_2ADDR | 5 << 12 | 0 << 8, // add-float/2addr v0, v5
+ Instruction::SHR_INT | 3 << 8, 1 << 8 | 2 , // shr-int v3, v2, v1
+ Instruction::MUL_INT | 4 << 8, 1 << 8 | 2, // mul-int v4, v2, v1
+ Instruction::ADD_INT | 5 << 8, 3 << 8 | 2, // add-int v5, v2, v3
+ Instruction::SUB_INT | 2 << 8, 4 << 8 | 5, // sub-int v2, v5, v4
+ Instruction::INT_TO_FLOAT | 1 << 12 | 5 << 8, // int-to-float v5, v1
+ Instruction::DIV_FLOAT_2ADDR | 5 << 12 | 0 << 8, // div-float/2addr v0, v5
+ Instruction::FLOAT_TO_INT | 0 << 12 | 5 << 8, // float-to-int v5, v0
+ Instruction::ADD_INT_2ADDR | 5 << 12 | 2 << 8, // add-int/2addr v2, v5
+ Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8 | 1, // add-int/lit8 v1, v1, #int 1 // #01
+ Instruction::GOTO | 0xeb << 8, // goto 0004 // -0015
+ Instruction::RETURN | 2 << 8); // return v2
+
+ constexpr int kNumberOfRuns = 10;
+ for (int i = 0; i < kNumberOfRuns; ++i) {
+ CompileWithRandomSchedulerAndRun(data, true, 138774);
+ }
+}
+
+} // namespace art