diff options
author | 2017-05-08 18:36:40 +0100 | |
---|---|---|
committer | 2017-06-30 16:32:54 +0100 | |
commit | 2a3471fc83383bfe3e060799482e372420ba6150 (patch) | |
tree | 7b7764521a0b67392e023f1efacc0dbae64fe675 /compiler/optimizing | |
parent | 89ae0f42e38a2f985ac404830f2a05fecf9547e2 (diff) |
Disambiguate memory accesses in instruction scheduling
Based on aliasing information from heap location collector,
instruction scheduling can further eliminate side-effect
dependencies between memory accesses to different locations,
and perform better scheduling on memory loads and stores.
Performance improvements of this CL, measured on Cortex-A53:
| benchmarks | ARM64 backend | ARM backend |
|----------------+---------------|-------------|
| algorithm | 0.1 % | 0.1 % |
| benchmarksgame | 0.5 % | 1.3 % |
| caffeinemark | 0.0 % | 0.0 % |
| math | 5.1 % | 5.0 % |
| stanford | 1.1 % | 0.6 % |
| testsimd | 0.4 % | 0.1 % |
Compilation time impact is negligible, because this
heap location load store analysis is only performed
on loop basic blocks that get instruction scheduled.
Test: m test-art-host
Test: m test-art-target
Test: 706-checker-scheduler
Change-Id: I43d7003c09bfab9d3a1814715df666aea9a7360b
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/scheduler.cc | 213 | ||||
-rw-r--r-- | compiler/optimizing/scheduler.h | 20 | ||||
-rw-r--r-- | compiler/optimizing/scheduler_test.cc | 157 |
3 files changed, 378 insertions, 12 deletions
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 147fa1c05a..5ad011d8f9 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -66,28 +66,215 @@ static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) { return false; } +size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const { + DCHECK(heap_location_collector_ != nullptr); + size_t heap_loc = heap_location_collector_->GetArrayAccessHeapLocation(array, index); + // This array access should be analyzed and added to HeapLocationCollector before. + DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); + return heap_loc; +} -// Check whether `node` depends on `other`, taking into account `SideEffect` -// information and `CanThrow` information. -static bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) { - if (MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) { +bool SchedulingGraph::ArrayAccessMayAlias(const HInstruction* node, + const HInstruction* other) const { + DCHECK(heap_location_collector_ != nullptr); + size_t node_heap_loc = ArrayAccessHeapLocation(node->InputAt(0), node->InputAt(1)); + size_t other_heap_loc = ArrayAccessHeapLocation(other->InputAt(0), other->InputAt(1)); + + // For example: arr[0] and arr[0] + if (node_heap_loc == other_heap_loc) { return true; } - if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) { + // For example: arr[0] and arr[i] + if (heap_location_collector_->MayAlias(node_heap_loc, other_heap_loc)) { return true; } - if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) { + return false; +} + +static bool IsArrayAccess(const HInstruction* instruction) { + return instruction->IsArrayGet() || instruction->IsArraySet(); +} + +static bool IsInstanceFieldAccess(const HInstruction* instruction) { + return instruction->IsInstanceFieldGet() || + instruction->IsInstanceFieldSet() || + instruction->IsUnresolvedInstanceFieldGet() || + instruction->IsUnresolvedInstanceFieldSet(); +} + +static bool IsStaticFieldAccess(const HInstruction* instruction) { + return instruction->IsStaticFieldGet() || + instruction->IsStaticFieldSet() || + instruction->IsUnresolvedStaticFieldGet() || + instruction->IsUnresolvedStaticFieldSet(); +} + +static bool IsResolvedFieldAccess(const HInstruction* instruction) { + return instruction->IsInstanceFieldGet() || + instruction->IsInstanceFieldSet() || + instruction->IsStaticFieldGet() || + instruction->IsStaticFieldSet(); +} + +static bool IsUnresolvedFieldAccess(const HInstruction* instruction) { + return instruction->IsUnresolvedInstanceFieldGet() || + instruction->IsUnresolvedInstanceFieldSet() || + instruction->IsUnresolvedStaticFieldGet() || + instruction->IsUnresolvedStaticFieldSet(); +} + +static bool IsFieldAccess(const HInstruction* instruction) { + return IsResolvedFieldAccess(instruction) || IsUnresolvedFieldAccess(instruction); +} + +static const FieldInfo* GetFieldInfo(const HInstruction* instruction) { + if (instruction->IsInstanceFieldGet()) { + return &instruction->AsInstanceFieldGet()->GetFieldInfo(); + } else if (instruction->IsInstanceFieldSet()) { + return &instruction->AsInstanceFieldSet()->GetFieldInfo(); + } else if (instruction->IsStaticFieldGet()) { + return &instruction->AsStaticFieldGet()->GetFieldInfo(); + } else if (instruction->IsStaticFieldSet()) { + return &instruction->AsStaticFieldSet()->GetFieldInfo(); + } else { + LOG(FATAL) << "Unexpected field access type"; + UNREACHABLE(); + } +} + +size_t SchedulingGraph::FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const { + DCHECK(obj != nullptr); + DCHECK(field != nullptr); + DCHECK(heap_location_collector_ != nullptr); + + size_t heap_loc = heap_location_collector_->FindHeapLocationIndex( + heap_location_collector_->FindReferenceInfoOf( + heap_location_collector_->HuntForOriginalReference(obj)), + field->GetFieldOffset().SizeValue(), + nullptr, + field->GetDeclaringClassDefIndex()); + // This field access should be analyzed and added to HeapLocationCollector before. + DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); + + return heap_loc; +} + +bool SchedulingGraph::FieldAccessMayAlias(const HInstruction* node, + const HInstruction* other) const { + DCHECK(heap_location_collector_ != nullptr); + + // Static and instance field accesses should not alias. + if ((IsInstanceFieldAccess(node) && IsStaticFieldAccess(other)) || + (IsStaticFieldAccess(node) && IsInstanceFieldAccess(other))) { + return false; + } + + // If either of the field accesses is unresolved. + if (IsUnresolvedFieldAccess(node) || IsUnresolvedFieldAccess(other)) { + // Conservatively treat these two accesses may alias. + return true; + } + + // If both fields accesses are resolved. + const FieldInfo* node_field = GetFieldInfo(node); + const FieldInfo* other_field = GetFieldInfo(other); + + size_t node_loc = FieldAccessHeapLocation(node->InputAt(0), node_field); + size_t other_loc = FieldAccessHeapLocation(other->InputAt(0), other_field); + + if (node_loc == other_loc) { return true; } + if (!heap_location_collector_->MayAlias(node_loc, other_loc)) { + return false; + } + + return true; +} + +bool SchedulingGraph::HasMemoryDependency(const HInstruction* node, + const HInstruction* other) const { + if (!MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) { + return false; + } + + if (heap_location_collector_ == nullptr || + heap_location_collector_->GetNumberOfHeapLocations() == 0) { + // Without HeapLocation information from load store analysis, + // we cannot do further disambiguation analysis on these two instructions. + // Just simply say that those two instructions have memory dependency. + return true; + } + + if (IsArrayAccess(node) && IsArrayAccess(other)) { + return ArrayAccessMayAlias(node, other); + } + if (IsFieldAccess(node) && IsFieldAccess(other)) { + return FieldAccessMayAlias(node, other); + } + + // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess + if (node->IsVecMemoryOperation() && other->IsVecMemoryOperation()) { + return true; + } + if (node->IsVecMemoryOperation() && IsArrayAccess(other)) { + return true; + } + if (IsArrayAccess(node) && other->IsVecMemoryOperation()) { + return true; + } + + // Heap accesses of different kinds should not alias. + if (IsArrayAccess(node) && IsFieldAccess(other)) { + return false; + } + if (IsFieldAccess(node) && IsArrayAccess(other)) { + return false; + } + if (node->IsVecMemoryOperation() && IsFieldAccess(other)) { + return false; + } + if (IsFieldAccess(node) && other->IsVecMemoryOperation()) { + return false; + } + + // We conservatively treat all other cases having dependency, + // for example, Invoke and ArrayGet. + return true; +} + +bool SchedulingGraph::HasExceptionDependency(const HInstruction* node, + const HInstruction* other) const { + if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) { + return true; + } + if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) { + return true; + } if (other->CanThrow() && node->CanThrow()) { return true; } - // Check side-effect dependency between ArrayGet and BoundsCheck. - if (node->IsArrayGet() && other->IsBoundsCheck() && node->InputAt(1) == other) { + // Above checks should cover all cases where we cannot reorder two + // instructions which may throw exception. + return false; +} + +// Check whether `node` depends on `other`, taking into account `SideEffect` +// information and `CanThrow` information. +bool SchedulingGraph::HasSideEffectDependency(const HInstruction* node, + const HInstruction* other) const { + if (HasMemoryDependency(node, other)) { + return true; + } + + // Even if above memory dependency check has passed, it is still necessary to + // check dependencies between instructions that can throw and instructions + // that write to memory. + if (HasExceptionDependency(node, other)) { return true; } @@ -379,6 +566,14 @@ void HScheduler::Schedule(HBasicBlock* block) { // Build the scheduling graph. scheduling_graph_.Clear(); + + // Only perform LSA/HeapLocation analysis on the basic block that + // is going to get instruction scheduled. + HeapLocationCollector heap_location_collector(block->GetGraph()); + heap_location_collector.VisitBasicBlock(block); + heap_location_collector.BuildAliasingMatrix(); + scheduling_graph_.SetHeapLocationCollector(heap_location_collector); + for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); CHECK_EQ(instruction->GetBlock(), block) @@ -606,7 +801,9 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, // Avoid compilation error when compiling for unsupported instruction set. UNUSED(only_optimize_loop_blocks); UNUSED(schedule_randomly); + UNUSED(codegen_); #endif + switch (instruction_set_) { #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: { diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index 73e8087cd0..930a2c82cf 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -21,6 +21,7 @@ #include "base/time_utils.h" #include "driver/compiler_driver.h" +#include "load_store_analysis.h" #include "nodes.h" #include "optimization.h" #include "code_generator.h" @@ -246,7 +247,8 @@ class SchedulingGraph : public ValueObject { : scheduler_(scheduler), arena_(arena), contains_scheduling_barrier_(false), - nodes_map_(arena_->Adapter(kArenaAllocScheduler)) {} + nodes_map_(arena_->Adapter(kArenaAllocScheduler)), + heap_location_collector_(nullptr) {} SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { SchedulingNode* node = new (arena_) SchedulingNode(instr, arena_, is_scheduling_barrier); @@ -261,6 +263,10 @@ class SchedulingGraph : public ValueObject { contains_scheduling_barrier_ = false; } + void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) { + heap_location_collector_ = &heap_location_collector; + } + SchedulingNode* GetNode(const HInstruction* instr) const { auto it = nodes_map_.Find(instr); if (it == nodes_map_.end()) { @@ -294,6 +300,13 @@ class SchedulingGraph : public ValueObject { void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) { AddDependency(node, dependency, /*is_data_dependency*/false); } + bool HasMemoryDependency(const HInstruction* node, const HInstruction* other) const; + bool HasExceptionDependency(const HInstruction* node, const HInstruction* other) const; + bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) const; + bool ArrayAccessMayAlias(const HInstruction* node, const HInstruction* other) const; + bool FieldAccessMayAlias(const HInstruction* node, const HInstruction* other) const; + size_t ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const; + size_t FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const; // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects. void AddDependencies(HInstruction* instruction, bool is_scheduling_barrier = false); @@ -305,6 +318,8 @@ class SchedulingGraph : public ValueObject { bool contains_scheduling_barrier_; ArenaHashMap<const HInstruction*, SchedulingNode*> nodes_map_; + + const HeapLocationCollector* heap_location_collector_; }; /* @@ -482,10 +497,9 @@ class HInstructionScheduling : public HOptimization { static constexpr const char* kInstructionScheduling = "scheduler"; + private: CodeGenerator* const codegen_; const InstructionSet instruction_set_; - - private: DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling); }; diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index d87600aa5e..cc7222dd45 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -18,6 +18,7 @@ #include "builder.h" #include "codegen_test_utils.h" #include "common_compiler_test.h" +#include "load_store_analysis.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "pc_relative_fixups_x86.h" @@ -193,6 +194,147 @@ class SchedulerTest : public CommonCompilerTest { } } + void TestDependencyGraphOnAliasingArrayAccesses(HScheduler* scheduler) { + HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + graph_->BuildDominatorTree(); + + HInstruction* arr = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + Primitive::kPrimNot); + HInstruction* i = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(1), + 1, + Primitive::kPrimInt); + HInstruction* j = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(1), + 1, + Primitive::kPrimInt); + HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + Primitive::kPrimNot); + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* add0 = new (&allocator_) HAdd(Primitive::kPrimInt, i, c0); + HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, i, c1); + HInstruction* sub0 = new (&allocator_) HSub(Primitive::kPrimInt, i, c0); + HInstruction* sub1 = new (&allocator_) HSub(Primitive::kPrimInt, i, c1); + HInstruction* arr_set_0 = new (&allocator_) HArraySet(arr, c0, c0, Primitive::kPrimInt, 0); + HInstruction* arr_set_1 = new (&allocator_) HArraySet(arr, c1, c0, Primitive::kPrimInt, 0); + HInstruction* arr_set_i = new (&allocator_) HArraySet(arr, i, c0, Primitive::kPrimInt, 0); + HInstruction* arr_set_add0 = new (&allocator_) HArraySet(arr, add0, c0, Primitive::kPrimInt, 0); + HInstruction* arr_set_add1 = new (&allocator_) HArraySet(arr, add1, c0, Primitive::kPrimInt, 0); + HInstruction* arr_set_sub0 = new (&allocator_) HArraySet(arr, sub0, c0, Primitive::kPrimInt, 0); + HInstruction* arr_set_sub1 = new (&allocator_) HArraySet(arr, sub1, c0, Primitive::kPrimInt, 0); + HInstruction* arr_set_j = new (&allocator_) HArraySet(arr, j, c0, Primitive::kPrimInt, 0); + HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object, + c1, + nullptr, + Primitive::kPrimInt, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + + HInstruction* block_instructions[] = {arr, + i, + j, + object, + add0, + add1, + sub0, + sub1, + arr_set_0, + arr_set_1, + arr_set_i, + arr_set_add0, + arr_set_add1, + arr_set_sub0, + arr_set_sub1, + arr_set_j, + set_field10}; + + for (HInstruction* instr : block_instructions) { + entry->AddInstruction(instr); + } + + SchedulingGraph scheduling_graph(scheduler, graph_->GetArena()); + HeapLocationCollector heap_location_collector(graph_); + heap_location_collector.VisitBasicBlock(entry); + heap_location_collector.BuildAliasingMatrix(); + scheduling_graph.SetHeapLocationCollector(heap_location_collector); + + for (HInstruction* instr : ReverseRange(block_instructions)) { + // Build scheduling graph with memory access aliasing information + // from LSA/heap_location_collector. + scheduling_graph.AddNode(instr); + } + + // LSA/HeapLocationCollector should see those ArraySet instructions. + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 9U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's aliasing matrix after load store analysis. + // HeapLocationCollector and SchedulingGraph should report consistent relationships. + size_t loc1 = HeapLocationCollector::kHeapLocationNotFound; + size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; + + // Test side effect dependency: array[0] and array[1] + loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, c0); + loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, c1); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_1, arr_set_0)); + + // Test side effect dependency based on LSA analysis: array[i] and array[j] + loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, j); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); + + // Test side effect dependency based on LSA analysis: array[i] and array[i+0] + loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, add0); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_add0, arr_set_i)); + + // Test side effect dependency based on LSA analysis: array[i] and array[i-0] + loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, sub0); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i)); + + // Test side effect dependency based on LSA analysis: array[i] and array[i+1] + loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, add1); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_add1, arr_set_i)); + + // Test side effect dependency based on LSA analysis: array[i+1] and array[i-1] + loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, add1); + loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, sub1); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub1, arr_set_add1)); + + // Test side effect dependency based on LSA analysis: array[j] and all others array accesses + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub0)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add1)); + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub1)); + + // Test that ArraySet and FieldSet should not have side effect dependency + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_i, set_field10)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, set_field10)); + + // Exercise target specific scheduler and SchedulingLatencyVisitor. + scheduler->Schedule(graph_); + } + ArenaPool pool_; ArenaAllocator allocator_; HGraph* graph_; @@ -204,15 +346,28 @@ TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) { arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector); TestBuildDependencyGraphAndSchedule(&scheduler); } + +TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { + CriticalPathSchedulingNodeSelector critical_path_selector; + arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector); + TestDependencyGraphOnAliasingArrayAccesses(&scheduler); +} #endif #if defined(ART_ENABLE_CODEGEN_arm) -TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) { +TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor); TestBuildDependencyGraphAndSchedule(&scheduler); } + +TEST_F(SchedulerTest, ArrayAccessAliasingARM) { + CriticalPathSchedulingNodeSelector critical_path_selector; + arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); + arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor); + TestDependencyGraphOnAliasingArrayAccesses(&scheduler); +} #endif TEST_F(SchedulerTest, RandomScheduling) { |