Revert^4 "Partial Load Store Elimination"
This reverts commit 791df7a161ecfa28eb69862a4bc285282463b960.
This unreverts commit fc1ce4e8be0d977e3d41699f5ec746d68f63c024.
This unreverts commit b8686ce4c93eba7192ed7ef89e7ffd9f3aa6cd07.
We incorrectly failed to include PredicatedInstanceFieldGet in a few
conditions, including a DCHECK. This caused tests to fail under the
read-barrier-table-lookup configuration.
Reason for revert: Fixed 2 incorrect checks
Bug: 67037140
Test: ./art/test/testrunner/run_build_test_target.py -j70 art-gtest-read-barrier-table-lookup
Change-Id: I32b01b29fb32077fb5074e7c77a0226bd1fcaab4
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 2e0f2b1..17ce694 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -27,16 +27,23 @@
#include "base/bit_vector-inl.h"
#include "base/bit_vector.h"
#include "base/globals.h"
+#include "base/indenter.h"
#include "base/iteration_range.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
+#include "base/transform_iterator.h"
#include "escape.h"
#include "execution_subgraph.h"
+#include "handle.h"
#include "load_store_analysis.h"
+#include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
#include "nodes.h"
+#include "optimizing/execution_subgraph.h"
#include "optimizing_compiler_stats.h"
#include "reference_type_propagation.h"
#include "side_effects_analysis.h"
+#include "stack_map.h"
/**
* The general algorithm of load-store elimination (LSE).
@@ -57,6 +64,9 @@
* - In phase 4, we commit the changes, replacing loads marked for elimination
* in previous processing and removing stores not marked for keeping. We also
* remove allocations that are no longer needed.
+ * - In phase 5, we move allocations which only escape along some executions
+ * closer to their escape points and fixup non-escaping paths with their actual
+ * values, creating PHIs when needed.
*
* 1. Walk over blocks and their instructions.
*
@@ -82,7 +92,9 @@
* to maintain the validity of all heap locations during the optimization
* phase, we only record substitutes at this phase and the real elimination
* is delayed till the end of LSE. Loads that require a loop Phi placeholder
- * replacement are recorded for processing later.
+ * replacement are recorded for processing later. We also keep track of the
+ * heap-value at the start load so that later partial-LSE can predicate the
+ * load.
* - If the instruction is a store, it updates the heap value for the heap
* location with the stored value and records the store itself so that we can
* mark it for keeping if the value becomes observable. Heap values are
@@ -228,7 +240,80 @@
* The time complexity of this phase is
* O(instructions + instruction_uses) .
*
- * FIXME: The time complexity described above assumes that the
+ * 5. Partial LSE
+ *
+ * Move allocations closer to their escapes and remove/predicate loads and
+ * stores as required.
+ *
+ * Partial singletons are objects which only escape from the function or have
+ * multiple names along certain execution paths. In cases where we recognize
+ * these partial singletons we can move the allocation and initialization
+ * closer to the actual escape(s). We can then perform a simplified version of
+ * LSE step 2 to determine the unescaped value of any reads performed after the
+ * object may have escaped. These are used to replace these reads with
+ * 'predicated-read' instructions where the value is only read if the object
+ * has actually escaped. We use the existence of the object itself as the
+ * marker of whether escape has occurred.
+ *
+ * There are several steps in this sub-pass
+ *
+ * 5.1 Group references
+ *
+ * Since all heap-locations for a single reference escape at the same time, we
+ * need to group the heap-locations by reference and process them at the same
+ * time.
+ *
+ * O(heap_locations).
+ *
+ * FIXME: The time complexity above assumes we can bucket the heap-locations in
+ * O(1) which is not true since we just perform a linear-scan of the heap-ref
+ * list. Since there are generally only a small number of heap-references which
+ * are partial-singletons this is fine and lower real overhead than a hash map.
+ *
+ * 5.2 Generate materializations
+ *
+ * Once we have the references we add new 'materialization blocks' on the edges
+ * where escape becomes inevitable. This information is calculated by the
+ * execution-subgraphs created during load-store-analysis. We create new
+ * 'materialization's in these blocks and initialize them with the value of
+ * each heap-location ignoring side effects (since the object hasn't escaped
+ * yet). Worst case this is the same time-complexity as step 3 since we may
+ * need to materialize phis.
+ *
+ * O(heap_locations^2 * materialization_edges)
+ *
+ * 5.3 Propagate materializations
+ *
+ * Since we use the materialization as the marker for escape we need to
+ * propagate it throughout the graph. Since the subgraph analysis considers any
+ * lifetime that escapes a loop (and hence would require a loop-phi) to be
+ * escaping at the loop-header we do not need to create any loop-phis to do
+ * this.
+ *
+ * O(edges)
+ *
+ * NB: Currently the subgraph analysis considers all objects to have their
+ * lifetimes start at the entry block. This simplifies that analysis enormously
+ * but means that we cannot distinguish between an escape in a loop where the
+ * lifetime does not escape the loop (in which case this pass could optimize)
+ * and one where it does escape the loop (in which case the whole loop is
+ * escaping). This is a shortcoming that would be good to fix at some point.
+ *
+ * 5.4 Propagate partial values
+ *
+ * We need to replace loads and stores to the partial reference with predicated
+ * ones that have default non-escaping values. Again this is the same as step 3.
+ *
+ * O(heap_locations^2 * edges)
+ *
+ * 5.5 Final fixup
+ *
+ * Now all we need to do is replace and remove uses of the old reference with the
+ * appropriate materialization.
+ *
+ * O(instructions + uses)
+ *
+ * FIXME: The time complexities described above assumes that the
* HeapLocationCollector finds a heap location for an instruction in O(1)
* time but it is currently O(heap_locations); this can be fixed by adding
* a hash map to the HeapLocationCollector.
@@ -236,11 +321,18 @@
namespace art {
+#define LSE_VLOG \
+ if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
+
+class PartialLoadStoreEliminationHelper;
+class HeapRefHolder;
+
// Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
class LSEVisitor final : private HGraphDelegateVisitor {
public:
LSEVisitor(HGraph* graph,
const HeapLocationCollector& heap_location_collector,
+ bool perform_partial_lse,
OptimizingCompilerStats* stats);
void Run();
@@ -278,6 +370,45 @@
uint32_t heap_location_;
};
+ struct Marker {};
+
+ class Value;
+
+ class PriorValueHolder {
+ public:
+ constexpr explicit PriorValueHolder(Value prior);
+
+ constexpr bool IsInstruction() const {
+ return std::holds_alternative<HInstruction*>(value_);
+ }
+ constexpr bool IsPhi() const {
+ return std::holds_alternative<PhiPlaceholder>(value_);
+ }
+ constexpr bool IsDefault() const {
+ return std::holds_alternative<Marker>(value_);
+ }
+ constexpr PhiPlaceholder GetPhiPlaceholder() const {
+ DCHECK(IsPhi());
+ return std::get<PhiPlaceholder>(value_);
+ }
+ constexpr HInstruction* GetInstruction() const {
+ DCHECK(IsInstruction());
+ return std::get<HInstruction*>(value_);
+ }
+
+ Value ToValue() const;
+ void Dump(std::ostream& oss) const;
+
+ constexpr bool Equals(PriorValueHolder other) const {
+ return value_ == other.value_;
+ }
+
+ private:
+ std::variant<Marker, HInstruction*, PhiPlaceholder> value_;
+ };
+
+ friend constexpr bool operator==(const Marker&, const Marker&);
+ friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2);
friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2);
friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2);
@@ -310,6 +441,14 @@
return Value(ValuelessType::kPureUnknown);
}
+ static constexpr Value PartialUnknown(Value old_value) {
+ if (old_value.IsInvalid() || old_value.IsPureUnknown()) {
+ return PureUnknown();
+ } else {
+ return Value(PriorValueHolder(old_value));
+ }
+ }
+
static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) {
return Value(MergedUnknownMarker{phi_placeholder});
}
@@ -346,6 +485,10 @@
GetValuelessType() == ValuelessType::kInvalid;
}
+ bool IsPartialUnknown() const {
+ return std::holds_alternative<PriorValueHolder>(value_);
+ }
+
bool IsMergedUnknown() const {
return std::holds_alternative<MergedUnknownMarker>(value_);
}
@@ -356,7 +499,7 @@
}
bool IsUnknown() const {
- return IsPureUnknown() || IsMergedUnknown();
+ return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown();
}
bool IsDefault() const {
@@ -381,10 +524,15 @@
}
HInstruction* GetInstruction() const {
- DCHECK(IsInstruction());
+ DCHECK(IsInstruction()) << *this;
return std::get<HInstruction*>(value_);
}
+ PriorValueHolder GetPriorValue() const {
+ DCHECK(IsPartialUnknown());
+ return std::get<PriorValueHolder>(value_);
+ }
+
PhiPlaceholder GetPhiPlaceholder() const {
DCHECK(NeedsPhi() || IsMergedUnknown());
if (NeedsNonLoopPhi()) {
@@ -402,7 +550,7 @@
}
HBasicBlock* GetMergeBlock(const HGraph* graph) const {
- DCHECK(IsMergedUnknown()) << this;
+ DCHECK(IsMergedUnknown()) << *this;
return graph->GetBlocks()[GetMergeBlockId()];
}
@@ -411,6 +559,8 @@
return GetPhiPlaceholder().GetHeapLocation();
}
+ constexpr bool ExactEquals(Value other) const;
+
constexpr bool Equals(Value other) const;
constexpr bool Equals(HInstruction* instruction) const {
@@ -427,7 +577,8 @@
HInstruction*,
MergedUnknownMarker,
NeedsNonLoopPhiMarker,
- NeedsLoopPhiMarker>;
+ NeedsLoopPhiMarker,
+ PriorValueHolder>;
constexpr ValuelessType GetValuelessType() const {
return std::get<ValuelessType>(value_);
}
@@ -493,7 +644,9 @@
}
Value Replacement(Value value) const {
- DCHECK(value.NeedsPhi());
+ DCHECK(value.NeedsPhi() ||
+ (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown()))
+ << value << " phase: " << current_phase_;
Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
DCHECK(replacement.IsUnknown() ||
@@ -502,6 +655,16 @@
}
Value ReplacementOrValue(Value value) const {
+ if (current_phase_ == Phase::kPartialElimination) {
+ if (value.IsPartialUnknown()) {
+ value = value.GetPriorValue().ToValue();
+ }
+ if (value.IsMergedUnknown()) {
+ return phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()
+ ? Replacement(value)
+ : Value::ForLoopPhiPlaceholder(value.GetPhiPlaceholder());
+ }
+ }
if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
return Replacement(value);
} else {
@@ -598,6 +761,7 @@
static bool IsLoad(HInstruction* instruction) {
// Unresolved load is not treated as a load.
return instruction->IsInstanceFieldGet() ||
+ instruction->IsPredicatedInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
instruction->IsVecLoad() ||
instruction->IsArrayGet();
@@ -623,7 +787,7 @@
// Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
// This is necessary if the stored heap value can be observed.
void KeepStores(Value value) {
- if (value.IsPureUnknown()) {
+ if (value.IsPureUnknown() || value.IsPartialUnknown()) {
return;
}
if (value.IsMergedUnknown()) {
@@ -743,12 +907,16 @@
void VisitGetLocation(HInstruction* instruction, size_t idx);
void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
+ void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) {
+ field_infos_[heap_loc] = info;
+ }
void VisitBasicBlock(HBasicBlock* block) override;
enum class Phase {
kLoadElimination,
- kStoreElimination
+ kStoreElimination,
+ kPartialElimination,
};
bool TryReplacingLoopPhiPlaceholderWithDefault(
@@ -765,8 +933,10 @@
bool can_use_default_or_phi);
bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
DataType::Type type);
+ bool MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes, DataType::Type type);
bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
DataType::Type type);
+ bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type);
std::optional<PhiPlaceholder> TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder,
HInstruction* load);
void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input);
@@ -776,6 +946,22 @@
void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
void FindStoresWritingOldValues();
+ void FinishFullLSE();
+ void PrepareForPartialPhiComputation();
+ // Create materialization block and materialization object for the given predecessor of entry.
+ HInstruction* SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
+ HeapRefHolder&& holder,
+ size_t pred_idx,
+ HBasicBlock* blk);
+ // Returns the value that would be read by the 'read' instruction on
+ // 'orig_new_inst' if 'orig_new_inst' has not escaped.
+ HInstruction* GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read);
+ void MovePartialEscapes();
+
+ void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
+ LOG(FATAL) << "Visited instruction " << instruction->DumpWithoutArgs()
+ << " but LSE should be the only source of predicated-ifield-gets!";
+ }
void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
HInstruction* object = instruction->InputAt(0);
@@ -914,7 +1100,7 @@
}
if (side_effects.DoesAnyWrite()) {
// The value may be clobbered.
- heap_values[i].value = Value::PureUnknown();
+ heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
}
}
}
@@ -1010,6 +1196,12 @@
}
}
+ bool ShouldPerformPartialLSE() const {
+ return perform_partial_lse_ && !GetGraph()->IsCompilingOsr();
+ }
+
+ bool perform_partial_lse_;
+
const HeapLocationCollector& heap_location_collector_;
// Use local allocator for allocating memory.
@@ -1035,6 +1227,12 @@
// in the end. These are indexed by the load's id.
ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
+ // Value at the start of the given instruction for instructions which directly
+ // read from a heap-location (i.e. FieldGet). The mapping to heap-location is
+ // implicit through the fact that each instruction can only directly refer to
+ // a single heap-location.
+ ScopedArenaHashMap<HInstruction*, Value> intermediate_values_;
+
// Record stores to keep in a bit vector indexed by instruction ID.
ArenaBitVector kept_stores_;
// When we need to keep all stores that feed a Phi placeholder, we just record the
@@ -1063,23 +1261,70 @@
ScopedArenaVector<HInstruction*> singleton_new_instances_;
+ // The field infos for each heap location (if relevant).
+ ScopedArenaVector<const FieldInfo*> field_infos_;
+
Phase current_phase_;
+ friend class PartialLoadStoreEliminationHelper;
+ friend struct ScopedRestoreHeapValues;
+
friend std::ostream& operator<<(std::ostream& os, const Value& v);
- friend std::ostream& operator<<(std::ostream& os, const Phase& phase);
+ friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v);
+ friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase);
DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
};
+std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) {
+ p.Dump(oss);
+ return oss;
+}
+
std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
switch (phase) {
case LSEVisitor::Phase::kLoadElimination:
return oss << "kLoadElimination";
case LSEVisitor::Phase::kStoreElimination:
return oss << "kStoreElimination";
+ case LSEVisitor::Phase::kPartialElimination:
+ return oss << "kPartialElimination";
}
}
+void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const {
+ if (IsDefault()) {
+ oss << "Default";
+ } else if (IsPhi()) {
+ oss << "Phi: " << GetPhiPlaceholder();
+ } else {
+ oss << "Instruction: " << *GetInstruction();
+ }
+}
+
+constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val)
+ : value_(Marker{}) {
+ DCHECK(!val.IsInvalid() && !val.IsPureUnknown());
+ if (val.IsPartialUnknown()) {
+ value_ = val.GetPriorValue().value_;
+ } else if (val.IsMergedUnknown() || val.NeedsPhi()) {
+ value_ = val.GetPhiPlaceholder();
+ } else if (val.IsInstruction()) {
+ value_ = val.GetInstruction();
+ } else {
+ DCHECK(val.IsDefault());
+ }
+}
+
+constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) {
+ return true;
+}
+
+constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1,
+ const LSEVisitor::PriorValueHolder& p2) {
+ return p1.Equals(p2);
+}
+
constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1,
const LSEVisitor::PhiPlaceholder& p2) {
return p1.Equals(p2);
@@ -1105,6 +1350,20 @@
return oss;
}
+LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const {
+ if (IsDefault()) {
+ return Value::Default();
+ } else if (IsPhi()) {
+ return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder());
+ } else {
+ return Value::ForInstruction(GetInstruction());
+ }
+}
+
+constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const {
+ return value_ == other.value_;
+}
+
constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const {
// Only valid values can be compared.
DCHECK(IsValid());
@@ -1129,6 +1388,8 @@
case ValuelessType::kInvalid:
return os << "Invalid";
}
+ } else if (IsPartialUnknown()) {
+ return os << "PartialUnknown[" << GetPriorValue() << "]";
} else if (IsInstruction()) {
return os << "Instruction[id: " << GetInstruction()->GetId()
<< ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]";
@@ -1151,8 +1412,10 @@
LSEVisitor::LSEVisitor(HGraph* graph,
const HeapLocationCollector& heap_location_collector,
+ bool perform_partial_lse,
OptimizingCompilerStats* stats)
: HGraphDelegateVisitor(graph, stats),
+ perform_partial_lse_(perform_partial_lse),
heap_location_collector_(heap_location_collector),
allocator_(graph->GetArenaStack()),
num_phi_placeholders_(GetGraph()->GetBlocks().size() *
@@ -1166,13 +1429,14 @@
substitute_instructions_for_loads_(graph->GetCurrentInstructionId(),
nullptr,
allocator_.Adapter(kArenaAllocLSE)),
+ intermediate_values_(allocator_.Adapter(kArenaAllocLSE)),
kept_stores_(&allocator_,
- /*start_bits=*/ graph->GetCurrentInstructionId(),
- /*expandable=*/ false,
+ /*start_bits=*/graph->GetCurrentInstructionId(),
+ /*expandable=*/false,
kArenaAllocLSE),
phi_placeholders_to_search_for_kept_stores_(&allocator_,
num_phi_placeholders_,
- /*expandable=*/ false,
+ /*expandable=*/false,
kArenaAllocLSE),
loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
store_records_(allocator_.Adapter(kArenaAllocLSE)),
@@ -1180,10 +1444,12 @@
Value::Invalid(),
allocator_.Adapter(kArenaAllocLSE)),
kept_merged_unknowns_(&allocator_,
- /*start_bits=*/ num_phi_placeholders_,
- /*expandable=*/ false,
+ /*start_bits=*/num_phi_placeholders_,
+ /*expandable=*/false,
kArenaAllocLSE),
singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
+ field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
+ allocator_.Adapter(kArenaAllocLSE)),
current_phase_(Phase::kLoadElimination) {
// Clear bit vectors.
phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
@@ -1249,9 +1515,13 @@
// Don't eliminate loads in irreducible loops.
if (block->GetLoopInformation()->IsIrreducible()) {
heap_values.resize(num_heap_locations,
- {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
+ {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()});
// Also keep the stores before the loop header, including in blocks that were not visited yet.
+ bool is_osr = GetGraph()->IsCompilingOsr();
for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
+ heap_values[idx].value =
+ is_osr ? Value::PureUnknown()
+ : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx));
KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
}
return;
@@ -1410,9 +1680,10 @@
phi_inputs.clear();
for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
- DCHECK(!pred_value.IsUnknown())
- << "block " << current_block->GetBlockId() << " pred: " << predecessor->GetBlockId();
- if (pred_value.NeedsNonLoopPhi()) {
+ DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
+ << " pred: " << predecessor->GetBlockId();
+ if (pred_value.NeedsNonLoopPhi() ||
+ (current_phase_ == Phase::kPartialElimination && pred_value.IsMergedUnknown())) {
// We need to process the Phi placeholder first.
work_queue.push_back(pred_value.GetPhiPlaceholder());
} else if (pred_value.IsDefault()) {
@@ -1439,7 +1710,17 @@
uint32_t block_id = instruction->GetBlock()->GetBlockId();
ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
ValueRecord& record = heap_values[idx];
+ if (instruction->IsFieldAccess()) {
+ RecordFieldInfo(&instruction->GetFieldInfo(), idx);
+ }
DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
+ // If we are unknown, we either come from somewhere untracked or we can reconstruct the partial
+ // value.
+ DCHECK(!record.value.IsPureUnknown() ||
+ heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo() == nullptr ||
+ !heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()->IsPartialSingleton())
+ << "In " << GetGraph()->PrettyMethod() << ": " << record.value << " for " << *instruction;
+ intermediate_values_.insert({instruction, record.value});
loads_and_stores_.push_back({ instruction, idx });
if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
!IsDefaultOrPhiAllowedForLoad(instruction)) {
@@ -1475,6 +1756,9 @@
void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
DCHECK(!IsStore(value)) << value->DebugName();
+ if (instruction->IsFieldAccess()) {
+ RecordFieldInfo(&instruction->GetFieldInfo(), idx);
+ }
// value may already have a substitute.
value = FindSubstitute(value);
HBasicBlock* block = instruction->GetBlock();
@@ -1533,7 +1817,7 @@
// Kill heap locations that may alias and keep previous stores to these locations.
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
- heap_values[i].value = Value::PureUnknown();
+ heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
}
}
@@ -1753,6 +2037,8 @@
if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
work_queue.push_back(value.GetPhiPlaceholder());
+ LSE_VLOG << "For materialization of " << phi_placeholder
+ << " we need to materialize " << value;
}
}
}
@@ -1764,6 +2050,11 @@
bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
DataType::Type type) {
+ return MaterializeLoopPhis(ArrayRef<const size_t>(phi_placeholder_indexes), type);
+}
+
+bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_indexes,
+ DataType::Type type) {
// Materialize all predecessors that do not need a loop Phi and determine if all inputs
// other than loop Phis are the same.
const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
@@ -1775,8 +2066,11 @@
size_t idx = phi_placeholder.GetHeapLocation();
for (HBasicBlock* predecessor : block->GetPredecessors()) {
Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
- if (value.NeedsNonLoopPhi()) {
- DCHECK(current_phase_ == Phase::kLoadElimination);
+ if (value.NeedsNonLoopPhi() ||
+ (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown())) {
+ DCHECK(current_phase_ == Phase::kLoadElimination ||
+ current_phase_ == Phase::kPartialElimination)
+ << current_phase_;
MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
value = Replacement(value);
}
@@ -2001,6 +2295,15 @@
return true;
}
+bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) {
+ ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
+ ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE);
+ auto res =
+ FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true);
+ CHECK(!res.has_value()) << *res;
+ return MaterializeLoopPhis(abv, type);
+}
+
std::optional<LSEVisitor::PhiPlaceholder> LSEVisitor::TryToMaterializeLoopPhis(
PhiPlaceholder phi_placeholder, HInstruction* load) {
DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
@@ -2144,7 +2447,7 @@
// propagated as a value to this load) and store the load as the new heap value.
found_unreplaceable_load = true;
KeepStores(record.value);
- record.value = Value::PureUnknown();
+ record.value = Value::MergedUnknown(record.value.GetPhiPlaceholder());
local_heap_values[idx] = Value::ForInstruction(load_or_store);
} else if (local_heap_values[idx].NeedsLoopPhi()) {
// The load may still be replaced with a Phi later.
@@ -2386,7 +2689,57 @@
!success);
}
+struct ScopedRestoreHeapValues {
+ public:
+ ScopedRestoreHeapValues(ArenaStack* alloc,
+ size_t num_heap_locs,
+ ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore)
+ : alloc_(alloc),
+ updated_values_(alloc_.Adapter(kArenaAllocLSE)),
+ to_restore_(to_restore) {
+ updated_values_.reserve(num_heap_locs * to_restore_.size());
+ }
+
+ ~ScopedRestoreHeapValues() {
+ for (const auto& rec : updated_values_) {
+ to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_;
+ }
+ }
+
+ template<typename Func>
+ void ForEachRecord(Func func) {
+ for (size_t blk_id : Range(to_restore_.size())) {
+ for (size_t heap_loc : Range(to_restore_[blk_id].size())) {
+ LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc];
+ LSEVisitor::Value initial = vr->value;
+ func(vr);
+ if (!vr->value.ExactEquals(initial)) {
+ updated_values_.push_back({blk_id, heap_loc, initial});
+ }
+ }
+ }
+ }
+
+ private:
+ struct UpdateRecord {
+ size_t blk_id;
+ size_t heap_loc;
+ LSEVisitor::Value val_;
+ };
+ ScopedArenaAllocator alloc_;
+ ScopedArenaVector<UpdateRecord> updated_values_;
+ ScopedArenaVector<ScopedArenaVector<LSEVisitor::ValueRecord>>& to_restore_;
+
+ DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues);
+};
+
void LSEVisitor::FindStoresWritingOldValues() {
+ // Partial LSE relies on knowing the real heap-values not the
+ // store-replacement versions so we need to restore the map after removing
+ // stores.
+ ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(),
+ heap_location_collector_.GetNumberOfHeapLocations(),
+ heap_values_for_);
// The Phi placeholder replacements have so far been used for eliminating loads,
// tracking values that would be stored if all stores were kept. As we want to
// compare actual old values after removing unmarked stores, prune the Phi
@@ -2401,10 +2754,14 @@
}
// Update heap values at end of blocks.
- for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
- for (ValueRecord& value_record : heap_values_for_[block->GetBlockId()]) {
- UpdateValueRecordForStoreElimination(&value_record);
- }
+ heap_vals.ForEachRecord([&](ValueRecord* rec) {
+ UpdateValueRecordForStoreElimination(rec);
+ });
+
+ if (kIsDebugBuild) {
+ heap_vals.ForEachRecord([](ValueRecord* rec) {
+ DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value;
+ });
}
// Use local allocator to reduce peak memory usage.
@@ -2458,7 +2815,903 @@
FindStoresWritingOldValues();
// 4. Replace loads and remove unnecessary stores and singleton allocations.
+ FinishFullLSE();
+ // 5. Move partial escapes down and fixup with PHIs.
+ current_phase_ = Phase::kPartialElimination;
+ MovePartialEscapes();
+}
+
+// Clear unknown loop-phi results. Here we'll be able to use partial-unknowns so we need to
+// retry all of them with more information about where they come from.
+void LSEVisitor::PrepareForPartialPhiComputation() {
+ std::replace_if(
+ phi_placeholder_replacements_.begin(),
+ phi_placeholder_replacements_.end(),
+ [](const Value& val) { return val.IsPureUnknown(); },
+ Value::Invalid());
+}
+
+class PartialLoadStoreEliminationHelper {
+ public:
+ PartialLoadStoreEliminationHelper(LSEVisitor* lse, ScopedArenaAllocator* alloc)
+ : lse_(lse),
+ alloc_(alloc),
+ new_ref_phis_(alloc_->Adapter(kArenaAllocLSE)),
+ heap_refs_(alloc_->Adapter(kArenaAllocLSE)),
+ max_preds_per_block_((*std::max_element(GetGraph()->GetActiveBlocks().begin(),
+ GetGraph()->GetActiveBlocks().end(),
+ [](HBasicBlock* a, HBasicBlock* b) {
+ return a->GetNumberOfPredecessors() <
+ b->GetNumberOfPredecessors();
+ }))
+ ->GetNumberOfPredecessors()),
+ materialization_blocks_(GetGraph()->GetBlocks().size() * max_preds_per_block_,
+ nullptr,
+ alloc_->Adapter(kArenaAllocLSE)),
+ first_materialization_block_id_(GetGraph()->GetBlocks().size()) {
+ heap_refs_.reserve(lse_->heap_location_collector_.GetNumberOfReferenceInfos());
+ new_ref_phis_.reserve(lse_->heap_location_collector_.GetNumberOfReferenceInfos() *
+ GetGraph()->GetBlocks().size());
+ CollectInterestingHeapRefs();
+ }
+
+ ~PartialLoadStoreEliminationHelper() {
+ if (heap_refs_.empty()) {
+ return;
+ }
+ ReferenceTypePropagation rtp_fixup(GetGraph(),
+ Handle<mirror::ClassLoader>(),
+ Handle<mirror::DexCache>(),
+ /* is_first_run= */ false);
+ rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
+ GetGraph()->ClearLoopInformation();
+ GetGraph()->ClearDominanceInformation();
+ GetGraph()->ClearReachabilityInformation();
+ GetGraph()->BuildDominatorTree();
+ GetGraph()->ComputeReachabilityInformation();
+ }
+
+ class IdxToHeapLoc {
+ public:
+ explicit IdxToHeapLoc(const HeapLocationCollector* hlc) : collector_(hlc) {}
+ HeapLocation* operator()(size_t idx) const {
+ return collector_->GetHeapLocation(idx);
+ }
+
+ private:
+ const HeapLocationCollector* collector_;
+ };
+
+
+ class HeapReferenceData {
+ public:
+ using LocIterator = IterationRange<TransformIterator<BitVector::IndexIterator, IdxToHeapLoc>>;
+ HeapReferenceData(PartialLoadStoreEliminationHelper* helper,
+ HNewInstance* new_inst,
+ const ExecutionSubgraph* subgraph,
+ ScopedArenaAllocator* alloc)
+ : new_instance_(new_inst),
+ helper_(helper),
+ heap_locs_(alloc,
+ helper->lse_->heap_location_collector_.GetNumberOfHeapLocations(),
+ /* expandable= */ false,
+ kArenaAllocLSE),
+ materializations_(
+ // We generally won't need to create too many materialization blocks and we can expand
+ // this as needed so just start off with 2x.
+ 2 * helper->lse_->GetGraph()->GetBlocks().size(),
+ nullptr,
+ alloc->Adapter(kArenaAllocLSE)),
+ collector_(helper->lse_->heap_location_collector_),
+ subgraph_(subgraph) {}
+
+ LocIterator IterateLocations() {
+ auto idxs = heap_locs_.Indexes();
+ return MakeTransformRange(idxs, IdxToHeapLoc(&collector_));
+ }
+
+ void AddHeapLocation(size_t idx) {
+ heap_locs_.SetBit(idx);
+ }
+
+ const ExecutionSubgraph* GetNoEscapeSubgraph() const {
+ return subgraph_;
+ }
+
+ bool IsPostEscape(HBasicBlock* blk) {
+ return std::any_of(
+ subgraph_->GetExcludedCohorts().cbegin(),
+ subgraph_->GetExcludedCohorts().cend(),
+ [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.PrecedesBlock(blk); });
+ }
+
+ bool InEscapeCohort(HBasicBlock* blk) {
+ return std::any_of(
+ subgraph_->GetExcludedCohorts().cbegin(),
+ subgraph_->GetExcludedCohorts().cend(),
+ [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.ContainsBlock(blk); });
+ }
+
+ bool BeforeAllEscapes(HBasicBlock* b) {
+ return std::none_of(subgraph_->GetExcludedCohorts().cbegin(),
+ subgraph_->GetExcludedCohorts().cend(),
+ [&](const ExecutionSubgraph::ExcludedCohort& ec) {
+ return ec.PrecedesBlock(b) || ec.ContainsBlock(b);
+ });
+ }
+
+ HNewInstance* OriginalNewInstance() const {
+ return new_instance_;
+ }
+
+ // Collect and replace all uses. We need to perform this twice since we will
+ // generate PHIs and additional uses as we create the default-values for
+ // pred-gets. These values might be other references that are also being
+ // partially eliminated. By running just the replacement part again we are
+ // able to avoid having to keep another whole in-progress partial map
+ // around. Since we will have already handled all the other uses in the
+ // first pass the second one will be quite fast.
+ void FixupUses(bool first_pass) {
+ ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
+ // Replace uses with materialized values.
+ ScopedArenaVector<InstructionUse<HInstruction>> to_replace(saa.Adapter(kArenaAllocLSE));
+ ScopedArenaVector<HInstruction*> to_remove(saa.Adapter(kArenaAllocLSE));
+ // Do we need to add a constructor-fence.
+ ScopedArenaVector<InstructionUse<HConstructorFence>> constructor_fences(
+ saa.Adapter(kArenaAllocLSE));
+ ScopedArenaVector<InstructionUse<HInstruction>> to_predicate(saa.Adapter(kArenaAllocLSE));
+
+ CollectReplacements(to_replace, to_remove, constructor_fences, to_predicate);
+
+ if (!first_pass) {
+ // If another partial creates new references they can only be in Phis or pred-get defaults
+ // so they must be in the to_replace group.
+ DCHECK(to_predicate.empty());
+ DCHECK(constructor_fences.empty());
+ DCHECK(to_remove.empty());
+ }
+
+ ReplaceInput(to_replace);
+ RemoveInput(to_remove);
+ CreateConstructorFences(constructor_fences);
+ PredicateInstructions(to_predicate);
+
+ CHECK(OriginalNewInstance()->GetUses().empty())
+ << OriginalNewInstance()->GetUses() << ", " << OriginalNewInstance()->GetEnvUses();
+ }
+
+ void AddMaterialization(HBasicBlock* blk, HInstruction* ins) {
+ if (blk->GetBlockId() >= materializations_.size()) {
+ // Make sure the materialization array is large enough, try to avoid
+ // re-sizing too many times by giving extra space.
+ materializations_.resize(blk->GetBlockId() * 2, nullptr);
+ }
+ DCHECK(materializations_[blk->GetBlockId()] == nullptr)
+ << "Already have a materialization in block " << blk->GetBlockId() << ": "
+ << *materializations_[blk->GetBlockId()] << " when trying to set materialization to "
+ << *ins;
+ materializations_[blk->GetBlockId()] = ins;
+ LSE_VLOG << "In block " << blk->GetBlockId() << " materialization is " << *ins;
+ helper_->NotifyNewMaterialization(ins);
+ }
+
+ bool HasMaterialization(HBasicBlock* blk) const {
+ return blk->GetBlockId() < materializations_.size() &&
+ materializations_[blk->GetBlockId()] != nullptr;
+ }
+
+ HInstruction* GetMaterialization(HBasicBlock* blk) const {
+ if (materializations_.size() <= blk->GetBlockId() ||
+ materializations_[blk->GetBlockId()] == nullptr) {
+ // This must be a materialization block added after the partial LSE of
+ // the current reference finished. Since every edge can only have at
+ // most one materialization block added to it we can just check the
+ // blocks predecessor.
+ DCHECK(helper_->IsMaterializationBlock(blk));
+ blk = helper_->FindDominatingNonMaterializationBlock(blk);
+ DCHECK(!helper_->IsMaterializationBlock(blk));
+ }
+ DCHECK_GT(materializations_.size(), blk->GetBlockId());
+ DCHECK(materializations_[blk->GetBlockId()] != nullptr);
+ return materializations_[blk->GetBlockId()];
+ }
+
+ void GenerateMaterializationValueFromPredecessors(HBasicBlock* blk) {
+ DCHECK(std::none_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
+ GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
+ [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
+ return cohort.IsEntryBlock(blk);
+ }));
+ DCHECK(!HasMaterialization(blk));
+ if (blk->IsExitBlock()) {
+ return;
+ } else if (blk->IsLoopHeader()) {
+ // See comment in execution_subgraph.h. Currently we act as though every
+ // allocation for partial elimination takes place in the entry block.
+ // This simplifies the analysis by making it so any escape cohort
+ // expands to contain any loops it is a part of. This is something that
+ // we should rectify at some point. In either case however we can still
+ // special case the loop-header since (1) currently the loop can't have
+ // any merges between different cohort entries since the pre-header will
+ // be the earliest place entry can happen and (2) even if the analysis
+ // is improved to consider lifetime of the object WRT loops any values
+ // which would require loop-phis would have to make the whole loop
+ // escape anyway.
+ // This all means we can always use value from the pre-header when the
+ // block is the loop-header and we didn't already create a
+ // materialization block. (NB when we do improve the analysis we will
+ // need to modify the materialization creation code to deal with this
+ // correctly.)
+ HInstruction* pre_header_val =
+ GetMaterialization(blk->GetLoopInformation()->GetPreHeader());
+ AddMaterialization(blk, pre_header_val);
+ return;
+ }
+ ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
+ ScopedArenaVector<HInstruction*> pred_vals(saa.Adapter(kArenaAllocLSE));
+ pred_vals.reserve(blk->GetNumberOfPredecessors());
+ for (HBasicBlock* pred : blk->GetPredecessors()) {
+ DCHECK(HasMaterialization(pred));
+ pred_vals.push_back(GetMaterialization(pred));
+ }
+ GenerateMaterializationValueFromPredecessorsDirect(blk, pred_vals);
+ }
+
+ void GenerateMaterializationValueFromPredecessorsForEntry(
+ HBasicBlock* entry, const ScopedArenaVector<HInstruction*>& pred_vals) {
+ DCHECK(std::any_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
+ GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
+ [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
+ return cohort.IsEntryBlock(entry);
+ }));
+ GenerateMaterializationValueFromPredecessorsDirect(entry, pred_vals);
+ }
+
+ private:
+ template <typename InstructionType>
+ struct InstructionUse {
+ InstructionType* instruction_;
+ size_t index_;
+ };
+
+ void ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>>& to_replace) {
+ for (auto& [ins, idx] : to_replace) {
+ HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
+ if (ins->IsPhi() && merged_inst->IsPhi() && ins->GetBlock() == merged_inst->GetBlock()) {
+ // Phis we just pass through the appropriate inputs.
+ ins->ReplaceInput(merged_inst->InputAt(idx), idx);
+ } else {
+ ins->ReplaceInput(merged_inst, idx);
+ }
+ }
+ }
+
+ void RemoveInput(const ScopedArenaVector<HInstruction*>& to_remove) {
+ for (HInstruction* ins : to_remove) {
+ if (ins->GetBlock() == nullptr) {
+ // Already dealt with.
+ continue;
+ }
+ DCHECK(BeforeAllEscapes(ins->GetBlock())) << *ins;
+ if (ins->IsInstanceFieldGet() || ins->IsInstanceFieldSet()) {
+ ins->GetBlock()->RemoveInstruction(ins);
+ } else {
+ // Can only be obj == other, obj != other, obj == obj (!?) or, obj != obj (!?)
+ // Since PHIs are escapes as far as LSE is concerned and we are before
+ // any escapes these are the only 4 options.
+ DCHECK(ins->IsEqual() || ins->IsNotEqual()) << *ins;
+ HInstruction* replacement;
+ if (UNLIKELY(ins->InputAt(0) == ins->InputAt(1))) {
+ replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(1)
+ : GetGraph()->GetIntConstant(0);
+ } else {
+ replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(0)
+ : GetGraph()->GetIntConstant(1);
+ }
+ ins->ReplaceWith(replacement);
+ ins->GetBlock()->RemoveInstruction(ins);
+ }
+ }
+ }
+
+ void CreateConstructorFences(
+ const ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences) {
+ if (!constructor_fences.empty()) {
+ uint32_t pc = constructor_fences.front().instruction_->GetDexPc();
+ for (auto& [cf, idx] : constructor_fences) {
+ if (cf->GetInputs().size() == 1) {
+ cf->GetBlock()->RemoveInstruction(cf);
+ } else {
+ cf->RemoveInputAt(idx);
+ }
+ }
+ for (const ExecutionSubgraph::ExcludedCohort& ec :
+ GetNoEscapeSubgraph()->GetExcludedCohorts()) {
+ for (HBasicBlock* blk : ec.EntryBlocks()) {
+ for (HBasicBlock* materializer :
+ Filter(MakeIterationRange(blk->GetPredecessors()),
+ [&](HBasicBlock* blk) { return helper_->IsMaterializationBlock(blk); })) {
+ HInstruction* new_cf = new (GetGraph()->GetAllocator()) HConstructorFence(
+ GetMaterialization(materializer), pc, GetGraph()->GetAllocator());
+ materializer->InsertInstructionBefore(new_cf, materializer->GetLastInstruction());
+ }
+ }
+ }
+ }
+ }
+
+ void PredicateInstructions(
+ const ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
+ for (auto& [ins, idx] : to_predicate) {
+ if (UNLIKELY(ins->GetBlock() == nullptr)) {
+ // Already handled due to obj == obj;
+ continue;
+ } else if (ins->IsInstanceFieldGet()) {
+ // IFieldGet[obj] => PredicatedIFieldGet[PartialValue, obj]
+ HInstruction* new_fget = new (GetGraph()->GetAllocator()) HPredicatedInstanceFieldGet(
+ ins->AsInstanceFieldGet(),
+ GetMaterialization(ins->GetBlock()),
+ helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins));
+ MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedLoadAdded);
+ ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
+ if (ins->GetType() == DataType::Type::kReference) {
+ // Reference info is the same
+ new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo());
+ }
+ ins->ReplaceWith(new_fget);
+ ins->ReplaceEnvUsesDominatedBy(ins, new_fget);
+ CHECK(ins->GetEnvUses().empty() && ins->GetUses().empty())
+ << "Instruction: " << *ins << " uses: " << ins->GetUses()
+ << ", env: " << ins->GetEnvUses();
+ ins->GetBlock()->RemoveInstruction(ins);
+ } else if (ins->IsInstanceFieldSet()) {
+ // Any predicated sets shouldn't require movement.
+ ins->AsInstanceFieldSet()->SetIsPredicatedSet();
+ MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedStoreAdded);
+ HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
+ ins->ReplaceInput(merged_inst, idx);
+ } else {
+ // comparisons need to be split into 2.
+ DCHECK(ins->IsEqual() || ins->IsNotEqual()) << "bad instruction " << *ins;
+ bool this_is_first = idx == 0;
+ if (ins->InputAt(0) == ins->InputAt(1)) {
+ // This is a obj == obj or obj != obj.
+ // No idea why anyone would do this but whatever.
+ ins->ReplaceWith(GetGraph()->GetIntConstant(ins->IsEqual() ? 1 : 0));
+ ins->GetBlock()->RemoveInstruction(ins);
+ continue;
+ } else {
+ HInstruction* is_escaped = new (GetGraph()->GetAllocator())
+ HNotEqual(GetMaterialization(ins->GetBlock()), GetGraph()->GetNullConstant());
+ HInstruction* combine_inst =
+ ins->IsEqual() ? static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HAnd(
+ DataType::Type::kBool, is_escaped, ins))
+ : static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HOr(
+ DataType::Type::kBool, is_escaped, ins));
+ ins->ReplaceInput(GetMaterialization(ins->GetBlock()), this_is_first ? 0 : 1);
+ ins->GetBlock()->InsertInstructionBefore(is_escaped, ins);
+ ins->GetBlock()->InsertInstructionAfter(combine_inst, ins);
+ ins->ReplaceWith(combine_inst);
+ combine_inst->ReplaceInput(ins, 1);
+ }
+ }
+ }
+ }
+
+ // Figure out all the instructions we need to
+ // fixup/replace/remove/duplicate. Since this requires an iteration of an
+ // intrusive linked list we want to do it only once and collect all the data
+ // here.
+ void CollectReplacements(
+ ScopedArenaVector<InstructionUse<HInstruction>>& to_replace,
+ ScopedArenaVector<HInstruction*>& to_remove,
+ ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences,
+ ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
+ size_t size = new_instance_->GetUses().SizeSlow();
+ to_replace.reserve(size);
+ to_remove.reserve(size);
+ constructor_fences.reserve(size);
+ to_predicate.reserve(size);
+ for (auto& use : new_instance_->GetUses()) {
+ HBasicBlock* blk =
+ helper_->FindDominatingNonMaterializationBlock(use.GetUser()->GetBlock());
+ if (InEscapeCohort(blk)) {
+ LSE_VLOG << "Replacing " << *new_instance_ << " use in " << *use.GetUser() << " with "
+ << *GetMaterialization(blk);
+ to_replace.push_back({use.GetUser(), use.GetIndex()});
+ } else if (IsPostEscape(blk)) {
+ LSE_VLOG << "User " << *use.GetUser() << " after escapes!";
+ // The fields + cmp are normal uses. Phi can only be here if it was
+ // generated by full LSE so whatever store+load that created the phi
+ // is the escape.
+ if (use.GetUser()->IsPhi()) {
+ to_replace.push_back({use.GetUser(), use.GetIndex()});
+ } else {
+ DCHECK(use.GetUser()->IsFieldAccess() ||
+ use.GetUser()->IsEqual() ||
+ use.GetUser()->IsNotEqual())
+ << *use.GetUser() << "@" << use.GetIndex();
+ to_predicate.push_back({use.GetUser(), use.GetIndex()});
+ }
+ } else if (use.GetUser()->IsConstructorFence()) {
+ LSE_VLOG << "User " << *use.GetUser() << " being moved to materialization!";
+ constructor_fences.push_back({use.GetUser()->AsConstructorFence(), use.GetIndex()});
+ } else {
+ LSE_VLOG << "User " << *use.GetUser() << " not contained in cohort!";
+ to_remove.push_back(use.GetUser());
+ }
+ }
+ DCHECK_EQ(
+ to_replace.size() + to_remove.size() + constructor_fences.size() + to_predicate.size(),
+ size);
+ }
+
+ void GenerateMaterializationValueFromPredecessorsDirect(
+ HBasicBlock* blk, const ScopedArenaVector<HInstruction*>& pred_vals) {
+ DCHECK(!pred_vals.empty());
+ bool all_equal = std::all_of(pred_vals.begin() + 1, pred_vals.end(), [&](HInstruction* val) {
+ return val == pred_vals.front();
+ });
+ if (LIKELY(all_equal)) {
+ AddMaterialization(blk, pred_vals.front());
+ } else {
+ // Make a PHI for the predecessors.
+ HPhi* phi = new (GetGraph()->GetAllocator()) HPhi(
+ GetGraph()->GetAllocator(), kNoRegNumber, pred_vals.size(), DataType::Type::kReference);
+ for (const auto& [ins, off] : ZipCount(MakeIterationRange(pred_vals))) {
+ phi->SetRawInputAt(off, ins);
+ }
+ blk->AddPhi(phi);
+ AddMaterialization(blk, phi);
+ }
+ }
+
+ HGraph* GetGraph() const {
+ return helper_->GetGraph();
+ }
+
+ HNewInstance* new_instance_;
+ PartialLoadStoreEliminationHelper* helper_;
+ ArenaBitVector heap_locs_;
+ ScopedArenaVector<HInstruction*> materializations_;
+ const HeapLocationCollector& collector_;
+ const ExecutionSubgraph* subgraph_;
+ };
+
+ ArrayRef<HeapReferenceData> GetHeapRefs() {
+ return ArrayRef<HeapReferenceData>(heap_refs_);
+ }
+
+ bool IsMaterializationBlock(HBasicBlock* blk) const {
+ return blk->GetBlockId() >= first_materialization_block_id_;
+ }
+
+ HBasicBlock* GetOrCreateMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
+ size_t idx = GetMaterializationBlockIndex(entry, pred_num);
+ HBasicBlock* blk = materialization_blocks_[idx];
+ if (blk == nullptr) {
+ blk = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph());
+ GetGraph()->AddBlock(blk);
+ LSE_VLOG << "creating materialization block " << blk->GetBlockId() << " on edge "
+ << entry->GetPredecessors()[pred_num]->GetBlockId() << "->" << entry->GetBlockId();
+ blk->AddInstruction(new (GetGraph()->GetAllocator()) HGoto());
+ materialization_blocks_[idx] = blk;
+ }
+ return blk;
+ }
+
+ HBasicBlock* GetMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
+ HBasicBlock* out = materialization_blocks_[GetMaterializationBlockIndex(entry, pred_num)];
+ DCHECK(out != nullptr) << "No materialization block for edge " << entry->GetBlockId() << "->"
+ << entry->GetPredecessors()[pred_num]->GetBlockId();
+ return out;
+ }
+
+ IterationRange<ArenaVector<HBasicBlock*>::const_iterator> IterateMaterializationBlocks() {
+ return MakeIterationRange(GetGraph()->GetBlocks().begin() + first_materialization_block_id_,
+ GetGraph()->GetBlocks().end());
+ }
+
+ void FixupPartialObjectUsers() {
+ for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
+ // Use the materialized instances to replace original instance
+ ref_data.FixupUses(/*first_pass=*/true);
+ CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
+ << ref_data.OriginalNewInstance()->GetUses() << ", "
+ << ref_data.OriginalNewInstance()->GetEnvUses();
+ }
+ // This can cause new uses to be created due to the creation of phis/pred-get defaults
+ for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
+ // Only need to handle new phis/pred-get defaults. DCHECK that's all we find.
+ ref_data.FixupUses(/*first_pass=*/false);
+ CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
+ << ref_data.OriginalNewInstance()->GetUses() << ", "
+ << ref_data.OriginalNewInstance()->GetEnvUses();
+ }
+ }
+
+ // Finds the first block which either is or dominates the given block which is
+ // not a materialization block
+ HBasicBlock* FindDominatingNonMaterializationBlock(HBasicBlock* blk) {
+ if (LIKELY(!IsMaterializationBlock(blk))) {
+ // Not a materialization block so itself.
+ return blk;
+ } else if (blk->GetNumberOfPredecessors() != 0) {
+ // We're far enough along that the materialization blocks have been
+ // inserted into the graph so no need to go searching.
+ return blk->GetSinglePredecessor();
+ }
+ // Search through the materialization blocks to find where it will be
+ // inserted.
+ for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
+ if (mat == blk) {
+ size_t cur_pred_idx = idx % max_preds_per_block_;
+ HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
+ return entry->GetPredecessors()[cur_pred_idx];
+ }
+ }
+ LOG(FATAL) << "Unable to find materialization block position for " << blk->GetBlockId() << "!";
+ return nullptr;
+ }
+
+ void InsertMaterializationBlocks() {
+ for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
+ if (mat == nullptr) {
+ continue;
+ }
+ size_t cur_pred_idx = idx % max_preds_per_block_;
+ HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
+ HBasicBlock* pred = entry->GetPredecessors()[cur_pred_idx];
+ mat->InsertBetween(pred, entry);
+ LSE_VLOG << "Adding materialization block " << mat->GetBlockId() << " on edge "
+ << pred->GetBlockId() << "->" << entry->GetBlockId();
+ }
+ }
+
+ // Replace any env-uses remaining of the partial singletons with the
+ // appropriate phis and remove the instructions.
+ void RemoveReplacedInstructions() {
+ for (HeapReferenceData& ref_data : GetHeapRefs()) {
+ CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
+ << ref_data.OriginalNewInstance()->GetUses() << ", "
+ << ref_data.OriginalNewInstance()->GetEnvUses()
+ << " inst is: " << ref_data.OriginalNewInstance();
+ const auto& env_uses = ref_data.OriginalNewInstance()->GetEnvUses();
+ while (!env_uses.empty()) {
+ const HUseListNode<HEnvironment*>& use = env_uses.front();
+ HInstruction* merged_inst =
+ ref_data.GetMaterialization(use.GetUser()->GetHolder()->GetBlock());
+ LSE_VLOG << "Replacing env use of " << *use.GetUser()->GetHolder() << "@" << use.GetIndex()
+ << " with " << *merged_inst;
+ use.GetUser()->ReplaceInput(merged_inst, use.GetIndex());
+ }
+ ref_data.OriginalNewInstance()->GetBlock()->RemoveInstruction(ref_data.OriginalNewInstance());
+ }
+ }
+
+ // We need to make sure any allocations dominate their environment uses.
+ // Technically we could probably remove the env-uses and be fine but this is easy.
+ void ReorderMaterializationsForEnvDominance() {
+ for (HBasicBlock* blk : IterateMaterializationBlocks()) {
+ ScopedArenaAllocator alloc(alloc_->GetArenaStack());
+ ArenaBitVector still_unsorted(
+ &alloc, GetGraph()->GetCurrentInstructionId(), false, kArenaAllocLSE);
+ // This is guaranteed to be very short (since we will abandon LSE if there
+ // are >= kMaxNumberOfHeapLocations (32) heap locations so that is the
+ // absolute maximum size this list can be) so doing a selection sort is
+ // fine. This avoids the need to do a complicated recursive check to
+ // ensure transitivity for std::sort.
+ ScopedArenaVector<HNewInstance*> materializations(alloc.Adapter(kArenaAllocLSE));
+ materializations.reserve(GetHeapRefs().size());
+ for (HInstruction* ins :
+ MakeSTLInstructionIteratorRange(HInstructionIterator(blk->GetInstructions()))) {
+ if (ins->IsNewInstance()) {
+ materializations.push_back(ins->AsNewInstance());
+ still_unsorted.SetBit(ins->GetId());
+ }
+ }
+ using Iter = ScopedArenaVector<HNewInstance*>::iterator;
+ Iter unsorted_start = materializations.begin();
+ Iter unsorted_end = materializations.end();
+ // selection sort. Required since the only check we can easily perform a
+ // is-before-all-unsorted check.
+ while (unsorted_start != unsorted_end) {
+ bool found_instruction = false;
+ for (Iter candidate = unsorted_start; candidate != unsorted_end; ++candidate) {
+ HNewInstance* ni = *candidate;
+ if (std::none_of(ni->GetAllEnvironments().cbegin(),
+ ni->GetAllEnvironments().cend(),
+ [&](const HEnvironment* env) {
+ return std::any_of(
+ env->GetEnvInputs().cbegin(),
+ env->GetEnvInputs().cend(),
+ [&](const HInstruction* env_element) {
+ return env_element != nullptr &&
+ still_unsorted.IsBitSet(env_element->GetId());
+ });
+ })) {
+ still_unsorted.ClearBit(ni->GetId());
+ std::swap(*unsorted_start, *candidate);
+ ++unsorted_start;
+ found_instruction = true;
+ break;
+ }
+ }
+ CHECK(found_instruction) << "Unable to select next materialization instruction."
+ << " Environments have a dependency loop!";
+ }
+ // Reverse so we as we prepend them we end up with the correct order.
+ auto reverse_iter = MakeIterationRange(materializations.rbegin(), materializations.rend());
+ for (HNewInstance* ins : reverse_iter) {
+ if (blk->GetFirstInstruction() != ins) {
+ // Don't do checks since that makes sure the move is safe WRT
+ // ins->CanBeMoved which for NewInstance is false.
+ ins->MoveBefore(blk->GetFirstInstruction(), /*do_checks=*/false);
+ }
+ }
+ }
+ }
+
+ private:
+ void CollectInterestingHeapRefs() {
+ // Get all the partials we need to move around.
+ for (size_t i = 0; i < lse_->heap_location_collector_.GetNumberOfHeapLocations(); ++i) {
+ ReferenceInfo* ri = lse_->heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+ if (ri->IsPartialSingleton() &&
+ ri->GetReference()->GetBlock() != nullptr &&
+ ri->GetNoEscapeSubgraph()->ContainsBlock(ri->GetReference()->GetBlock())) {
+ RecordHeapRefField(ri->GetReference()->AsNewInstance(), i);
+ }
+ }
+ }
+
+ void RecordHeapRefField(HNewInstance* ni, size_t loc) {
+ DCHECK(ni != nullptr);
+ // This is likely to be very short so just do a linear search.
+ auto it = std::find_if(heap_refs_.begin(), heap_refs_.end(), [&](HeapReferenceData& data) {
+ return data.OriginalNewInstance() == ni;
+ });
+ HeapReferenceData& cur_ref =
+ (it == heap_refs_.end())
+ ? heap_refs_.emplace_back(this,
+ ni,
+ lse_->heap_location_collector_.GetHeapLocation(loc)
+ ->GetReferenceInfo()
+ ->GetNoEscapeSubgraph(),
+ alloc_)
+ : *it;
+ cur_ref.AddHeapLocation(loc);
+ }
+
+
+ void NotifyNewMaterialization(HInstruction* ins) {
+ if (ins->IsPhi()) {
+ new_ref_phis_.push_back(ins->AsPhi());
+ }
+ }
+
+ size_t GetMaterializationBlockIndex(HBasicBlock* blk, size_t pred_num) const {
+ DCHECK_LT(blk->GetBlockId(), first_materialization_block_id_)
+ << "block is a materialization block!";
+ DCHECK_LT(pred_num, max_preds_per_block_);
+ return blk->GetBlockId() * max_preds_per_block_ + pred_num;
+ }
+
+ HGraph* GetGraph() const {
+ return lse_->GetGraph();
+ }
+
+ LSEVisitor* lse_;
+ ScopedArenaAllocator* alloc_;
+ ScopedArenaVector<HInstruction*> new_ref_phis_;
+ ScopedArenaVector<HeapReferenceData> heap_refs_;
+ size_t max_preds_per_block_;
+ // An array of (# of non-materialization blocks) * max_preds_per_block
+ // arranged in block-id major order. Since we can only have at most one
+ // materialization block on each edge this is the maximum possible number of
+ // materialization blocks.
+ ScopedArenaVector<HBasicBlock*> materialization_blocks_;
+ size_t first_materialization_block_id_;
+
+ friend void LSEVisitor::MovePartialEscapes();
+ friend class HeapReferenceData;
+};
+
+// Work around c++ type checking annoyances with not being able to forward-declare inner types.
+class HeapRefHolder
+ : public std::reference_wrapper<PartialLoadStoreEliminationHelper::HeapReferenceData> {};
+
+HInstruction* LSEVisitor::SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
+ HeapRefHolder&& holder,
+ size_t pred_idx,
+ HBasicBlock* entry) {
+ PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data = holder.get();
+ HBasicBlock* old_pred = entry->GetPredecessors()[pred_idx];
+ HInstruction* new_inst = ref_data.OriginalNewInstance();
+ if (UNLIKELY(!new_inst->GetBlock()->Dominates(entry))) {
+ LSE_VLOG << "Initial materialization in non-dominating block " << entry->GetBlockId()
+ << " is null!";
+ return GetGraph()->GetNullConstant();
+ }
+ HBasicBlock* bb = helper.GetOrCreateMaterializationBlock(entry, pred_idx);
+ CHECK(bb != nullptr) << "entry " << entry->GetBlockId() << " -> " << old_pred->GetBlockId();
+ HNewInstance* repl_create = new_inst->Clone(GetGraph()->GetAllocator())->AsNewInstance();
+ repl_create->SetPartialMaterialization();
+ bb->InsertInstructionBefore(repl_create, bb->GetLastInstruction());
+ repl_create->CopyEnvironmentFrom(new_inst->GetEnvironment());
+ MaybeRecordStat(stats_, MethodCompilationStat::kPartialAllocationMoved);
+ LSE_VLOG << "In blk " << bb->GetBlockId() << " initial materialization is " << *repl_create;
+ ref_data.AddMaterialization(bb, repl_create);
+ const FieldInfo* info = nullptr;
+ for (const HeapLocation* loc : ref_data.IterateLocations()) {
+ size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
+ info = field_infos_[loc_off];
+ DCHECK(loc->GetIndex() == nullptr);
+ Value value = ReplacementOrValue(heap_values_for_[old_pred->GetBlockId()][loc_off].value);
+ if (value.NeedsLoopPhi()) {
+ Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
+ DCHECK(!repl.IsUnknown());
+ DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
+ << repl << " from " << value << " pred is " << old_pred->GetBlockId();
+ if (!repl.IsInvalid()) {
+ value = repl;
+ } else {
+ FullyMaterializePhi(value.GetPhiPlaceholder(), info->GetFieldType());
+ value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
+ }
+ } else if (value.NeedsNonLoopPhi()) {
+ Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
+ DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
+ << repl << " from " << value << " pred is " << old_pred->GetBlockId();
+ if (!repl.IsInvalid()) {
+ value = repl;
+ } else {
+ MaterializeNonLoopPhis(value.GetPhiPlaceholder(), info->GetFieldType());
+ value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
+ }
+ }
+ DCHECK(value.IsDefault() || value.IsInstruction())
+ << GetGraph()->PrettyMethod() << ": " << value;
+
+ if (!value.IsDefault() &&
+ // shadow$_klass_ doesn't need to be manually initialized.
+ MemberOffset(loc->GetOffset()) != mirror::Object::ClassOffset()) {
+ CHECK(info != nullptr);
+ HInstruction* set_value =
+ new (GetGraph()->GetAllocator()) HInstanceFieldSet(repl_create,
+ value.GetInstruction(),
+ field_infos_[loc_off]->GetField(),
+ loc->GetType(),
+ MemberOffset(loc->GetOffset()),
+ false,
+ field_infos_[loc_off]->GetFieldIndex(),
+ loc->GetDeclaringClassDefIndex(),
+ field_infos_[loc_off]->GetDexFile(),
+ 0u);
+ bb->InsertInstructionAfter(set_value, repl_create);
+ LSE_VLOG << "Adding " << *set_value << " for materialization setup!";
+ }
+ }
+ return repl_create;
+}
+
+HInstruction* LSEVisitor::GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read) {
+ size_t loc = heap_location_collector_.GetFieldHeapLocation(orig_new_inst, &read->GetFieldInfo());
+ Value pred = ReplacementOrValue(intermediate_values_.find(read)->second);
+ LSE_VLOG << "using " << pred << " as default value for " << *read;
+ if (pred.IsInstruction()) {
+ return pred.GetInstruction();
+ } else if (pred.IsMergedUnknown() || pred.NeedsPhi()) {
+ FullyMaterializePhi(pred.GetPhiPlaceholder(),
+ heap_location_collector_.GetHeapLocation(loc)->GetType());
+ HInstruction* res = Replacement(pred).GetInstruction();
+ LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
+ return res;
+ }
+ LOG(FATAL) << "Unable to find unescaped value at " << read->DumpWithArgs()
+ << "! This should be impossible!";
+ UNREACHABLE();
+}
+
+void LSEVisitor::MovePartialEscapes() {
+ if (!ShouldPerformPartialLSE()) {
+ return;
+ }
+
+ ScopedArenaAllocator saa(allocator_.GetArenaStack());
+ PartialLoadStoreEliminationHelper helper(this, &saa);
+
+ // Since for PHIs we now will have more information (since we know the object
+ // hasn't escaped) we need to clear the old phi-replacements where we weren't
+ // able to find the value.
+ PrepareForPartialPhiComputation();
+
+ for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : helper.GetHeapRefs()) {
+ LSE_VLOG << "Creating materializations for " << *ref_data.OriginalNewInstance();
+ // Setup entry and exit blocks.
+ for (const auto& excluded_cohort : ref_data.GetNoEscapeSubgraph()->GetExcludedCohorts()) {
+ // Setup materialization blocks.
+ for (HBasicBlock* entry : excluded_cohort.EntryBlocksReversePostOrder()) {
+ // Setup entries.
+ // TODO Assuming we correctly break critical edges every entry block
+ // must have only a single predecessor so we could just put all this
+ // stuff in there. OTOH simplifier can do it for us and this is simpler
+ // to implement - giving clean separation between the original graph and
+ // materialization blocks - so for now we might as well have these new
+ // blocks.
+ ScopedArenaAllocator pred_alloc(saa.GetArenaStack());
+ ScopedArenaVector<HInstruction*> pred_vals(pred_alloc.Adapter(kArenaAllocLSE));
+ pred_vals.reserve(entry->GetNumberOfPredecessors());
+ for (const auto& [pred, pred_idx] :
+ ZipCount(MakeIterationRange(entry->GetPredecessors()))) {
+ DCHECK(!helper.IsMaterializationBlock(pred));
+ if (excluded_cohort.IsEntryBlock(pred)) {
+ pred_vals.push_back(ref_data.GetMaterialization(pred));
+ continue;
+ } else {
+ pred_vals.push_back(SetupPartialMaterialization(helper, {ref_data}, pred_idx, entry));
+ }
+ }
+ ref_data.GenerateMaterializationValueFromPredecessorsForEntry(entry, pred_vals);
+ }
+
+ // Setup exit block heap-values for later phi-generation.
+ for (HBasicBlock* exit : excluded_cohort.ExitBlocks()) {
+ // mark every exit of cohorts as having a value so we can easily
+ // materialize the PHIs.
+ // TODO By setting this we can easily use the normal MaterializeLoopPhis
+ // (via FullyMaterializePhis) in order to generate the default-values
+ // for predicated-gets. This has the unfortunate side effect of creating
+ // somewhat more phis than are really needed (in some cases). We really
+ // should try to eventually know that we can lower these PHIs to only
+ // the non-escaping value in cases where it is possible. Currently this
+ // is done to some extent in instruction_simplifier but we have more
+ // information here to do the right thing.
+ for (const HeapLocation* loc : ref_data.IterateLocations()) {
+ size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
+ // This Value::Default() is only used to fill in PHIs used as the
+ // default value for PredicatedInstanceFieldGets. The actual value
+ // stored there is meaningless since the Predicated-iget will use the
+ // actual field value instead on these paths.
+ heap_values_for_[exit->GetBlockId()][loc_off].value = Value::Default();
+ }
+ }
+ }
+
+ // string materialization through the graph.
+ // // Visit RPO to PHI the materialized object through the cohort.
+ for (HBasicBlock* blk : GetGraph()->GetReversePostOrder()) {
+ // NB This doesn't include materialization blocks.
+ DCHECK(!helper.IsMaterializationBlock(blk))
+ << "Materialization blocks should not be in RPO yet.";
+ if (ref_data.HasMaterialization(blk)) {
+ continue;
+ } else if (ref_data.BeforeAllEscapes(blk)) {
+ ref_data.AddMaterialization(blk, GetGraph()->GetNullConstant());
+ continue;
+ } else {
+ ref_data.GenerateMaterializationValueFromPredecessors(blk);
+ }
+ }
+ }
+
+ // Once we've generated all the materializations we can update the users.
+ helper.FixupPartialObjectUsers();
+
+ // Actually put materialization blocks into the graph
+ helper.InsertMaterializationBlocks();
+
+ // Get rid of the original instructions.
+ helper.RemoveReplacedInstructions();
+
+ // Ensure everything is ordered correctly in the materialization blocks. This
+ // involves moving every NewInstance to the top and ordering them so that any
+ // required env-uses are correctly ordered.
+ helper.ReorderMaterializationsForEnvDominance();
+}
+
+void LSEVisitor::FinishFullLSE() {
// Remove recorded load instructions that should be eliminated.
for (const LoadStoreRecord& record : loads_and_stores_) {
size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
@@ -2505,7 +3758,7 @@
}
}
-bool LoadStoreElimination::Run() {
+bool LoadStoreElimination::Run(bool enable_partial_lse) {
if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
// Debugger may set heap values or trigger deoptimization of callers.
// Try/catch support not implemented yet.
@@ -2519,7 +3772,11 @@
// O(1) though.
graph_->ComputeReachabilityInformation();
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, stats_, &allocator, /*for_elimination=*/true);
+ LoadStoreAnalysis lsa(graph_,
+ stats_,
+ &allocator,
+ enable_partial_lse ? LoadStoreAnalysisType::kFull
+ : LoadStoreAnalysisType::kNoPredicatedInstructions);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
@@ -2527,9 +3784,11 @@
return false;
}
- LSEVisitor lse_visitor(graph_, heap_location_collector, stats_);
+ LSEVisitor lse_visitor(graph_, heap_location_collector, enable_partial_lse, stats_);
lse_visitor.Run();
return true;
}
+#undef LSE_VLOG
+
} // namespace art