Enable support of VecLoad/VecStore in LSE
Changes:
- Enable VecLoad and VecStore support in LSE.
- This CL is based on Mingyao's CL: More general store elimination.
- The new gtest load_store_elimination_test is to test some corner cases
where ArrayGet/ArraySet/VecLoad/VecStore are mixed and overlap.
- The new java 530-checker-lse-simd.
Test: test.py --host --optimizing --jit --gtest
Test: test.py --target --optimizing --jit
Test: run-gtests.sh
Test: load_store_elimination_test
Test: 530-checker-lse-simd
Test: ./art/test/run-test --optimizing --64 --gcstress 667-checker-simd-alignment
Test: m -j80 art-check-testing-apex-gen
Change-Id: I2d2024ec75a2aaef56b527db98abb40c5f16be79
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index b33d0f4..4c150da 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -23,8 +23,6 @@
#include "load_store_analysis.h"
#include "side_effects_analysis.h"
-#include <iostream>
-
/**
* The general algorithm of load-store elimination (LSE).
* Load-store analysis in the previous pass collects a list of heap locations
@@ -64,8 +62,9 @@
* all the heap values, depending on the instruction's side effects.
* - Finalizable objects are considered as persisting at method
* return/deoptimization.
- * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad
- * and VecStore instructions.
+ * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
+ * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
+ * alias and no load/store is eliminated in such case.
* - Currently this LSE algorithm doesn't handle graph with try-catch, due to
* the special block merging structure.
*/
@@ -172,9 +171,7 @@
DCHECK(substitute2->IsTypeConversion());
continue;
}
- DCHECK(load2->IsInstanceFieldGet() ||
- load2->IsStaticFieldGet() ||
- load2->IsArrayGet());
+ DCHECK(IsLoad(load2));
DCHECK(substitute2 != nullptr);
if (substitute2 == substitute &&
load2->GetType() == load->GetType() &&
@@ -204,9 +201,7 @@
DCHECK(substitute_instructions_for_loads_[i]->IsTypeConversion());
continue;
}
- DCHECK(load->IsInstanceFieldGet() ||
- load->IsStaticFieldGet() ||
- load->IsArrayGet());
+ DCHECK(IsLoad(load));
HInstruction* substitute = substitute_instructions_for_loads_[i];
DCHECK(substitute != nullptr);
// We proactively retrieve the substitute for a removed load, so
@@ -224,7 +219,7 @@
// We guarantee that type A stored as type B and then fetched out as
// type C is the same as casting from type A to type C directly, since
// type B and type C will have the same size which is guarenteed in
- // HInstanceFieldGet/HStaticFieldGet/HArrayGet's SetType().
+ // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
// So we only need one type conversion from type A to type C.
HTypeConversion* type_conversion = AddTypeConversionIfNecessary(
load, substitute, load->GetType());
@@ -240,7 +235,7 @@
// At this point, stores in possibly_removed_stores_ can be safely removed.
for (HInstruction* store : possibly_removed_stores_) {
- DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
+ DCHECK(IsStore(store));
store->GetBlock()->RemoveInstruction(store);
}
@@ -261,26 +256,37 @@
}
private:
- static bool IsLoad(HInstruction* instruction) {
+ static bool IsLoad(const HInstruction* instruction) {
if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
return false;
}
// Unresolved load is not treated as a load.
return instruction->IsInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
+ instruction->IsVecLoad() ||
instruction->IsArrayGet();
}
- static bool IsStore(HInstruction* instruction) {
+ static bool IsStore(const HInstruction* instruction) {
if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
return false;
}
// Unresolved store is not treated as a store.
return instruction->IsInstanceFieldSet() ||
instruction->IsArraySet() ||
+ instruction->IsVecStore() ||
instruction->IsStaticFieldSet();
}
+ // Check if it is allowed to use default values for the specified load.
+ static bool IsDefaultAllowedForLoad(const HInstruction* load) {
+ DCHECK(IsLoad(load));
+ // Using defaults for VecLoads requires to create additional vector operations.
+ // As there are some issues with scheduling vector operations it is better to avoid creating
+ // them.
+ return !load->IsVecOperation();
+ }
+
// Returns the real heap value by finding its substitute or by "peeling"
// a store instruction.
HInstruction* GetRealHeapValue(HInstruction* heap_value) {
@@ -298,6 +304,8 @@
heap_value = heap_value->AsInstanceFieldSet()->GetValue();
} else if (heap_value->IsStaticFieldSet()) {
heap_value = heap_value->AsStaticFieldSet()->GetValue();
+ } else if (heap_value->IsVecStore()) {
+ heap_value = heap_value->AsVecStore()->GetValue();
} else {
DCHECK(heap_value->IsArraySet());
heap_value = heap_value->AsArraySet()->GetValue();
@@ -553,10 +561,15 @@
heap_values_for_[instruction->GetBlock()->GetBlockId()];
HInstruction* heap_value = heap_values[idx];
if (heap_value == kDefaultHeapValue) {
- HInstruction* constant = GetDefaultValue(instruction->GetType());
- AddRemovedLoad(instruction, constant);
- heap_values[idx] = constant;
- return;
+ if (IsDefaultAllowedForLoad(instruction)) {
+ HInstruction* constant = GetDefaultValue(instruction->GetType());
+ AddRemovedLoad(instruction, constant);
+ heap_values[idx] = constant;
+ return;
+ } else {
+ heap_values[idx] = kUnknownHeapValue;
+ heap_value = kUnknownHeapValue;
+ }
}
heap_value = GetRealHeapValue(heap_value);
if (heap_value == kUnknownHeapValue) {
@@ -590,6 +603,35 @@
return false;
}
+ bool CanValueBeKeptIfSameAsNew(HInstruction* value,
+ HInstruction* new_value,
+ HInstruction* new_value_set_instr) {
+ // For field/array set location operations, if the value is the same as the new_value
+ // it can be kept even if aliasing happens. All aliased operations will access the same memory
+ // range.
+ // For vector values, this is not true. For example:
+ // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane.
+ // VecStore array[i ,i+1,i+2,i+3] = packed_data;
+ // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
+ // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value
+ // here is not packed_data anymore.
+ //
+ // TODO: to allow such 'same value' optimization on vector data,
+ // LSA needs to report more fine-grain MAY alias information:
+ // (1) May alias due to two vector data partial overlap.
+ // e.g. a[i..i+3] and a[i+1,..,i+4].
+ // (2) May alias due to two vector data may complete overlap each other.
+ // e.g. a[i..i+3] and b[i..i+3].
+ // (3) May alias but the exact relationship between two locations is unknown.
+ // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
+ // This 'same value' optimization can apply only on case (2).
+ if (new_value_set_instr->IsVecOperation()) {
+ return false;
+ }
+
+ return Equal(value, new_value);
+ }
+
void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
DCHECK(!IsStore(value)) << value->DebugName();
@@ -636,23 +678,16 @@
// This store may kill values in other heap locations due to aliasing.
for (size_t i = 0; i < heap_values.size(); i++) {
- if (i == idx) {
+ if (i == idx ||
+ heap_values[i] == kUnknownHeapValue ||
+ CanValueBeKeptIfSameAsNew(heap_values[i], value, instruction) ||
+ !heap_location_collector_.MayAlias(i, idx)) {
continue;
}
- if (Equal(heap_values[i], value)) {
- // Same value should be kept even if aliasing happens.
- continue;
- }
- if (heap_values[i] == kUnknownHeapValue) {
- // Value is already unknown, no need for aliasing check.
- continue;
- }
- if (heap_location_collector_.MayAlias(i, idx)) {
- // Kill heap locations that may alias and as a result if the heap value
- // is a store, the store needs to be kept.
- KeepIfIsStore(heap_values[i]);
- heap_values[i] = kUnknownHeapValue;
- }
+ // Kill heap locations that may alias and as a result if the heap value
+ // is a store, the store needs to be kept.
+ KeepIfIsStore(heap_values[i]);
+ heap_values[i] = kUnknownHeapValue;
}
}
@@ -689,7 +724,16 @@
void VisitArraySet(HArraySet* instruction) override {
size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
- VisitSetLocation(instruction, idx, instruction->InputAt(2));
+ VisitSetLocation(instruction, idx, instruction->GetValue());
+ }
+
+ void VisitVecLoad(HVecLoad* instruction) override {
+ VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
+ }
+
+ void VisitVecStore(HVecStore* instruction) override {
+ size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
+ VisitSetLocation(instruction, idx, instruction->GetValue());
}
void VisitDeoptimize(HDeoptimize* instruction) override {
@@ -892,11 +936,6 @@
return false;
}
- // TODO: analyze VecLoad/VecStore better.
- if (graph_->HasSIMD()) {
- return false;
- }
-
LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_);
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
lse_visitor.VisitBasicBlock(block);