blob: 590328f81787c06bcf8971ccf85a7bd983b7eacc [file] [log] [blame]
Mingyao Yang8df69d42015-10-22 15:40:58 -07001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "load_store_elimination.h"
Aart Bik96fd51d2016-11-28 11:22:35 -080018
Alex Light86fe9b82020-11-16 16:54:01 +000019#include "base/arena_allocator.h"
Vladimir Marko3224f382020-06-23 14:19:53 +010020#include "base/arena_bit_vector.h"
Vladimir Marko009d1662017-10-10 13:21:15 +010021#include "base/array_ref.h"
Vladimir Marko3224f382020-06-23 14:19:53 +010022#include "base/bit_vector-inl.h"
Alex Light86fe9b82020-11-16 16:54:01 +000023#include "base/bit_vector.h"
Vladimir Marko009d1662017-10-10 13:21:15 +010024#include "base/scoped_arena_allocator.h"
25#include "base/scoped_arena_containers.h"
Aart Bik96fd51d2016-11-28 11:22:35 -080026#include "escape.h"
Alex Light86fe9b82020-11-16 16:54:01 +000027#include "execution_subgraph.h"
Andreas Gampe8cf9cb32017-07-19 09:28:38 -070028#include "load_store_analysis.h"
Alex Light86fe9b82020-11-16 16:54:01 +000029#include "nodes.h"
30#include "optimizing_compiler_stats.h"
Vladimir Marko3224f382020-06-23 14:19:53 +010031#include "reference_type_propagation.h"
Alex Light86fe9b82020-11-16 16:54:01 +000032#include "side_effects_analysis.h"
Mingyao Yang8df69d42015-10-22 15:40:58 -070033
Mingyao Yanga3540532018-01-25 12:17:28 -080034/**
35 * The general algorithm of load-store elimination (LSE).
Vladimir Marko3224f382020-06-23 14:19:53 +010036 *
37 * We use load-store analysis to collect a list of heap locations and perform
38 * alias analysis of those heap locations. LSE then keeps track of a list of
39 * heap values corresponding to the heap locations and stores that put those
Vladimir Marko9e3fe992020-08-25 16:17:51 +010040 * values in these locations.
41 * - In phase 1, we visit basic blocks in reverse post order and for each basic
42 * block, visit instructions sequentially, recording heap values and looking
43 * for loads and stores to eliminate without relying on loop Phis.
44 * - In phase 2, we look for loads that can be replaced by creating loop Phis
45 * or using a loop-invariant value.
46 * - In phase 3, we determine which stores are dead and can be eliminated and
47 * based on that information we re-evaluate whether some kept stores are
48 * storing the same value as the value in the heap location; such stores are
49 * also marked for elimination.
50 * - In phase 4, we commit the changes, replacing loads marked for elimination
51 * in previous processing and removing stores not marked for keeping. We also
52 * remove allocations that are no longer needed.
Vladimir Marko3224f382020-06-23 14:19:53 +010053 *
54 * 1. Walk over blocks and their instructions.
55 *
56 * The initial set of heap values for a basic block is
57 * - For a loop header of an irreducible loop, all heap values are unknown.
58 * - For a loop header of a normal loop, all values unknown at the end of the
59 * preheader are initialized to unknown, other heap values are set to Phi
60 * placeholders as we cannot determine yet whether these values are known on
61 * all back-edges. We use Phi placeholders also for array heap locations with
62 * index defined inside the loop but this helps only when the value remains
63 * zero from the array allocation throughout the loop.
64 * - For other basic blocks, we merge incoming values from the end of all
65 * predecessors. If any incoming value is unknown, the start value for this
66 * block is also unknown. Otherwise, if all the incoming values are the same
67 * (including the case of a single predecessor), the incoming value is used.
68 * Otherwise, we use a Phi placeholder to indicate different incoming values.
69 * We record whether such Phi placeholder depends on a loop Phi placeholder.
70 *
71 * For each instruction in the block
72 * - If the instruction is a load from a heap location with a known value not
73 * dependent on a loop Phi placeholder, the load can be eliminated, either by
74 * using an existing instruction or by creating new Phi(s) instead. In order
75 * to maintain the validity of all heap locations during the optimization
76 * phase, we only record substitutes at this phase and the real elimination
77 * is delayed till the end of LSE. Loads that require a loop Phi placeholder
78 * replacement are recorded for processing later.
79 * - If the instruction is a store, it updates the heap value for the heap
80 * location with the stored value and records the store itself so that we can
81 * mark it for keeping if the value becomes observable. Heap values are
82 * invalidated for heap locations that may alias with the store instruction's
83 * heap location and their recorded stores are marked for keeping as they are
84 * now potentially observable. The store instruction can be eliminated unless
85 * the value stored is later needed e.g. by a load from the same/aliased heap
86 * location or the heap location persists at method return/deoptimization.
87 * - A store that stores the same value as the heap value is eliminated.
88 * - For newly instantiated instances, their heap values are initialized to
89 * language defined default values.
90 * - Finalizable objects are considered as persisting at method
91 * return/deoptimization.
92 * - Some instructions such as invokes are treated as loading and invalidating
93 * all the heap values, depending on the instruction's side effects.
94 * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
95 * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
96 * alias and no load/store is eliminated in such case.
97 * - Currently this LSE algorithm doesn't handle graph with try-catch, due to
98 * the special block merging structure.
99 *
100 * The time complexity of the initial phase has several components. The total
101 * time for the initialization of heap values for all blocks is
102 * O(heap_locations * edges)
103 * and the time complexity for simple instruction processing is
104 * O(instructions).
105 * See the description of phase 3 for additional complexity due to matching of
106 * existing Phis for replacing loads.
107 *
108 * 2. Process loads that depend on loop Phi placeholders.
109 *
110 * We go over these loads to determine whether they can be eliminated. We look
111 * for the set of all Phi placeholders that feed the load and depend on a loop
112 * Phi placeholder and, if we find no unknown value, we construct the necessary
113 * Phi(s) or, if all other inputs are identical, i.e. the location does not
114 * change in the loop, just use that input. If we do find an unknown input, this
115 * must be from a loop back-edge and we replace the loop Phi placeholder with
116 * unknown value and re-process loads and stores that previously depended on
117 * loop Phi placeholders. This shall find at least one load of an unknown value
118 * which is now known to be unreplaceable or a new unknown value on a back-edge
119 * and we repeat this process until each load is either marked for replacement
120 * or found to be unreplaceable. As we mark at least one additional loop Phi
121 * placeholder as unreplacable in each iteration, this process shall terminate.
122 *
123 * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
124 * is limited by the number of Phi placeholders and their dependencies we need
125 * to search with worst-case time complexity
126 * O(phi_placeholder_dependencies) .
127 * The dependencies are usually just the Phi placeholders' potential inputs,
128 * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
129 * replacement search, there are additional dependencies to consider, see below.
130 *
Vladimir Marko0571d472020-09-22 10:14:39 +0100131 * In the successful case (no unknown inputs found) we use the Floyd-Warshall
Vladimir Markoed29dce2020-08-21 17:25:16 +0100132 * algorithm to determine transitive closures for each found Phi placeholder,
Vladimir Marko3224f382020-06-23 14:19:53 +0100133 * and then match or materialize Phis from the smallest transitive closure,
134 * so that we can determine if such subset has a single other input. This has
135 * time complexity
136 * O(phi_placeholders_found^3) .
137 * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
138 * contribute to this as such Phi placeholders are replaced immediately.
139 * The total time of all such successful cases has time complexity
140 * O(phi_placeholders^3)
141 * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
142 * argument applies to the searches used to find all successful cases, so their
143 * total contribution is also just an insignificant
144 * O(phi_placeholder_dependencies) .
145 * The materialization of Phis has an insignificant total time complexity
146 * O(phi_placeholders * edges) .
147 *
148 * If we find an unknown input, we re-process heap values and loads with a time
149 * complexity that's the same as the phase 1 in the worst case. Adding this to
150 * the depth-first search time complexity yields
151 * O(phi_placeholder_dependencies + heap_locations * edges + instructions)
152 * for a single iteration. We can ignore the middle term as it's proprotional
153 * to the number of Phi placeholder inputs included in the first term. Using
154 * the upper limit of number of such iterations, the total time complexity is
155 * O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
156 *
157 * The upper bound of Phi placeholder inputs is
158 * heap_locations * edges
159 * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
160 * include other heap locations in predecessor blocks with the upper bound of
161 * heap_locations^2 * edges .
162 * Using the estimate
163 * edges <= blocks^2
164 * and
165 * phi_placeholders <= heap_locations * blocks ,
166 * the worst-case time complexity of the
167 * O(phi_placeholder_dependencies * phi_placeholders)
168 * term from unknown input cases is actually
169 * O(heap_locations^3 * blocks^3) ,
Vladimir Marko0571d472020-09-22 10:14:39 +0100170 * exactly as the estimate for the Floyd-Warshall parts of successful cases.
Vladimir Marko3224f382020-06-23 14:19:53 +0100171 * Adding the other term from the unknown input cases (to account for the case
172 * with significantly more instructions than blocks and heap locations), the
173 * phase 2 time complexity is
174 * O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
175 *
176 * See the description of phase 3 for additional complexity due to matching of
177 * existing Phis for replacing loads.
178 *
179 * 3. Determine which stores to keep and which to eliminate.
180 *
Vladimir Markoed29dce2020-08-21 17:25:16 +0100181 * During instruction processing in phase 1 and re-processing in phase 2, we are
Vladimir Marko3224f382020-06-23 14:19:53 +0100182 * keeping a record of the stores and Phi placeholders that become observable
183 * and now propagate the observable Phi placeholders to all actual stores that
184 * feed them. Having determined observable stores, we look for stores that just
185 * overwrite the old value with the same. Since ignoring non-observable stores
186 * actually changes the old values in heap locations, we need to recalculate
187 * Phi placeholder replacements but we proceed similarly to the previous phase.
188 * We look for the set of all Phis that feed the old value replaced by the store
189 * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
190 * value, we try to match existing Phis (we do not create new Phis anymore) or,
191 * if all other inputs are identical, i.e. the location does not change in the
192 * loop, just use that input. If this succeeds and the old value is identical to
193 * the value we're storing, such store shall be eliminated.
194 *
Vladimir Markoed29dce2020-08-21 17:25:16 +0100195 * The work is similar to the phase 2, except that we're not re-processing loads
Vladimir Marko3224f382020-06-23 14:19:53 +0100196 * and stores anymore, so the time complexity of phase 3 is
197 * O(heap_locations^3 * blocks^3) .
198 *
199 * There is additional complexity in matching existing Phis shared between the
200 * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
201 * time (this could be difficult and slow), so each matching attempt is just
202 * looking at Phis in the block (both old Phis and newly created Phis) and their
203 * inputs. As we create at most `heap_locations` Phis in each block, the upper
204 * bound on the number of Phis we look at is
205 * heap_locations * (old_phis + heap_locations)
206 * and the worst-case time complexity is
207 * O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
208 * The first term is lower than one term in phase 2, so the relevant part is
209 * O(heap_locations * old_phis * edges) .
210 *
211 * 4. Replace loads and remove unnecessary stores and singleton allocations.
212 *
213 * A special type of objects called singletons are instantiated in the method
214 * and have a single name, i.e. no aliases. Singletons have exclusive heap
215 * locations since they have no aliases. Singletons are helpful in narrowing
216 * down the life span of a heap location such that they do not always need to
217 * participate in merging heap values. Allocation of a singleton can be
218 * eliminated if that singleton is not used and does not persist at method
219 * return/deoptimization.
220 *
221 * The time complexity of this phase is
222 * O(instructions + instruction_uses) .
223 *
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100224 * FIXME: The time complexity described above assumes that the
225 * HeapLocationCollector finds a heap location for an instruction in O(1)
226 * time but it is currently O(heap_locations); this can be fixed by adding
227 * a hash map to the HeapLocationCollector.
Mingyao Yanga3540532018-01-25 12:17:28 -0800228 */
229
Vladimir Marko0a516052019-10-14 13:00:44 +0000230namespace art {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700231
Mingyao Yangc62b7ec2017-10-25 16:42:15 -0700232// Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
Vladimir Marko3224f382020-06-23 14:19:53 +0100233class LSEVisitor final : private HGraphDelegateVisitor {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700234 public:
235 LSEVisitor(HGraph* graph,
Vladimir Marko3224f382020-06-23 14:19:53 +0100236 const HeapLocationCollector& heap_location_collector,
237 OptimizingCompilerStats* stats);
238
239 void Run();
240
241 private:
242 class PhiPlaceholder {
243 public:
244 PhiPlaceholder(uint32_t block_id, uint32_t heap_location)
245 : block_id_(block_id),
246 heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
247
248 uint32_t GetBlockId() const {
249 return block_id_;
250 }
251
252 size_t GetHeapLocation() const {
253 return heap_location_;
254 }
255
256 private:
257 uint32_t block_id_;
258 uint32_t heap_location_;
259 };
260
261 class Value {
262 public:
263 enum class Type {
264 kInvalid,
265 kUnknown,
Alex Light86fe9b82020-11-16 16:54:01 +0000266 kMergedUnknown,
Vladimir Marko3224f382020-06-23 14:19:53 +0100267 kDefault,
268 kInstruction,
269 kNeedsNonLoopPhi,
270 kNeedsLoopPhi,
271 };
272
273 static Value Invalid() {
274 Value value;
275 value.type_ = Type::kInvalid;
276 value.instruction_ = nullptr;
277 return value;
278 }
279
280 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
281 // A heap location can be set to an unknown heap value when:
282 // - it is coming from outside the method,
283 // - it is killed due to aliasing, or side effects, or merging with an unknown value.
284 static Value Unknown() {
285 Value value;
286 value.type_ = Type::kUnknown;
287 value.instruction_ = nullptr;
288 return value;
289 }
290
Alex Light86fe9b82020-11-16 16:54:01 +0000291 static Value MergedUnknown(const PhiPlaceholder* phi_placeholder) {
292 Value value;
293 value.type_ = Type::kMergedUnknown;
294 value.phi_placeholder_ = phi_placeholder;
295 return value;
296 }
297
Vladimir Marko3224f382020-06-23 14:19:53 +0100298 // Default heap value after an allocation.
299 // A heap location can be set to that value right after an allocation.
300 static Value Default() {
301 Value value;
302 value.type_ = Type::kDefault;
303 value.instruction_ = nullptr;
304 return value;
305 }
306
307 static Value ForInstruction(HInstruction* instruction) {
308 Value value;
309 value.type_ = Type::kInstruction;
310 value.instruction_ = instruction;
311 return value;
312 }
313
314 static Value ForNonLoopPhiPlaceholder(const PhiPlaceholder* phi_placeholder) {
315 Value value;
316 value.type_ = Type::kNeedsNonLoopPhi;
317 value.phi_placeholder_ = phi_placeholder;
318 return value;
319 }
320
321 static Value ForLoopPhiPlaceholder(const PhiPlaceholder* phi_placeholder) {
322 Value value;
323 value.type_ = Type::kNeedsLoopPhi;
324 value.phi_placeholder_ = phi_placeholder;
325 return value;
326 }
327
328 static Value ForPhiPlaceholder(const PhiPlaceholder* phi_placeholder, bool needs_loop_phi) {
329 return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
330 : ForNonLoopPhiPlaceholder(phi_placeholder);
331 }
332
333 bool IsValid() const {
334 return !IsInvalid();
335 }
336
337 bool IsInvalid() const {
338 return type_ == Type::kInvalid;
339 }
340
Alex Light86fe9b82020-11-16 16:54:01 +0000341 bool IsMergedUnknown() const {
342 return type_ == Type::kMergedUnknown;
343 }
344
345 bool IsPureUnknown() const {
Alex Light2316b3a2020-11-14 01:28:22 +0000346 return type_ == Type::kUnknown;
Alex Lightb6837f02020-11-12 17:05:28 +0000347 }
348
Alex Light86fe9b82020-11-16 16:54:01 +0000349 bool IsUnknown() const {
350 return type_ == Type::kUnknown || type_ == Type::kMergedUnknown;
351 }
352
Vladimir Marko3224f382020-06-23 14:19:53 +0100353 bool IsDefault() const {
354 return type_ == Type::kDefault;
355 }
356
357 bool IsInstruction() const {
358 return type_ == Type::kInstruction;
359 }
360
361 bool NeedsNonLoopPhi() const {
362 return type_ == Type::kNeedsNonLoopPhi;
363 }
364
365 bool NeedsLoopPhi() const {
366 return type_ == Type::kNeedsLoopPhi;
367 }
368
369 bool NeedsPhi() const {
370 return NeedsNonLoopPhi() || NeedsLoopPhi();
371 }
372
373 HInstruction* GetInstruction() const {
374 DCHECK(IsInstruction());
375 return instruction_;
376 }
377
378 const PhiPlaceholder* GetPhiPlaceholder() const {
Alex Light86fe9b82020-11-16 16:54:01 +0000379 DCHECK(NeedsPhi() || IsMergedUnknown());
Vladimir Marko3224f382020-06-23 14:19:53 +0100380 return phi_placeholder_;
381 }
382
Alex Light86fe9b82020-11-16 16:54:01 +0000383 uint32_t GetMergeBlockId() const {
384 DCHECK(IsMergedUnknown()) << this;
385 return phi_placeholder_->GetBlockId();
386 }
387
388 HBasicBlock* GetMergeBlock(const HGraph* graph) const {
389 DCHECK(IsMergedUnknown()) << this;
390 return graph->GetBlocks()[GetMergeBlockId()];
391 }
392
393 size_t GetHeapLocation() const {
394 DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
395 return phi_placeholder_->GetHeapLocation();
396 }
397
Vladimir Marko3224f382020-06-23 14:19:53 +0100398 bool Equals(Value other) const {
399 // Only valid values can be compared.
400 DCHECK(IsValid());
401 DCHECK(other.IsValid());
402 if (type_ != other.type_) {
403 // Default values are equal to zero bit pattern instructions.
404 return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
405 (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
406 } else {
407 // Note: Two unknown values are considered different.
Alex Light86fe9b82020-11-16 16:54:01 +0000408 return IsDefault() || (IsInstruction() && GetInstruction() == other.GetInstruction()) ||
409 (NeedsPhi() && GetPhiPlaceholder() == other.GetPhiPlaceholder()) ||
410 (IsMergedUnknown() && GetPhiPlaceholder() == other.GetPhiPlaceholder());
Vladimir Marko3224f382020-06-23 14:19:53 +0100411 }
412 }
413
414 bool Equals(HInstruction* instruction) const {
415 return Equals(ForInstruction(instruction));
416 }
417
Alex Light86fe9b82020-11-16 16:54:01 +0000418 std::ostream& Dump(std::ostream& os) const;
419
Vladimir Marko3224f382020-06-23 14:19:53 +0100420 private:
Alex Light86fe9b82020-11-16 16:54:01 +0000421 friend std::ostream& operator<<(std::ostream& os, const Value& v);
422
Vladimir Marko3224f382020-06-23 14:19:53 +0100423 Type type_;
424 union {
425 HInstruction* instruction_;
426 const PhiPlaceholder* phi_placeholder_;
427 };
428 };
429
430 // Get Phi placeholder index for access to `phi_placeholder_replacements_`
431 // and "visited" bit vectors during depth-first searches.
432 size_t PhiPlaceholderIndex(const PhiPlaceholder* phi_placeholder) const {
433 return static_cast<size_t>(phi_placeholder - phi_placeholders_.data());
Mingyao Yang8df69d42015-10-22 15:40:58 -0700434 }
435
Vladimir Marko3224f382020-06-23 14:19:53 +0100436 size_t PhiPlaceholderIndex(Value phi_placeholder) const {
437 return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
Mingyao Yang8df69d42015-10-22 15:40:58 -0700438 }
439
Alex Light86fe9b82020-11-16 16:54:01 +0000440 bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) {
441 auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
442 auto* sg = ri->GetNoEscapeSubgraph();
443 return ri->IsPartialSingleton() &&
444 std::none_of(sg->GetExcludedCohorts().cbegin(),
445 sg->GetExcludedCohorts().cend(),
446 [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool {
447 // Make sure we haven't yet and never will escape.
448 return ex.PrecedesBlock(blk) ||
449 ex.ContainsBlock(blk) ||
450 ex.SucceedsBlock(blk);
451 });
452 }
453
Vladimir Marko3224f382020-06-23 14:19:53 +0100454 const PhiPlaceholder* GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
455 size_t phi_placeholders_begin = phi_placeholders_begin_for_block_[block_id];
456 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholders_begin + idx];
457 DCHECK_EQ(phi_placeholder->GetBlockId(), block_id);
458 DCHECK_EQ(phi_placeholder->GetHeapLocation(), idx);
459 return phi_placeholder;
460 }
461
462 Value Replacement(Value value) const {
463 DCHECK(value.NeedsPhi());
464 Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
465 DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
466 DCHECK(replacement.IsUnknown() ||
467 FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
468 return replacement;
469 }
470
471 Value ReplacementOrValue(Value value) const {
472 if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
473 return Replacement(value);
474 } else {
475 DCHECK(!value.IsInstruction() ||
476 FindSubstitute(value.GetInstruction()) == value.GetInstruction());
477 return value;
478 }
479 }
480
481 static ScopedArenaVector<PhiPlaceholder> CreatePhiPlaceholders(
482 HGraph* graph,
483 const HeapLocationCollector& heap_location_collector,
484 ScopedArenaAllocator* allocator);
485 static ScopedArenaVector<size_t> CreatePhiPlaceholdersBeginForBlock(
486 HGraph* graph,
487 const HeapLocationCollector& heap_location_collector,
488 ScopedArenaAllocator* allocator);
489
490 // The record of a heap value and instruction(s) that feed that value.
491 struct ValueRecord {
492 Value value;
493 Value stored_by;
494 };
495
Vladimir Marko4307cd72020-07-17 14:35:56 +0100496 HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
497 HInstruction* value,
498 DataType::Type expected_type) {
Vladimir Marko94539fd2017-11-15 17:52:46 +0000499 // Should never add type conversion into boolean value.
Vladimir Marko4307cd72020-07-17 14:35:56 +0100500 if (expected_type == DataType::Type::kBool ||
501 DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
502 // TODO: This prevents type conversion of default values but we can still insert
503 // type conversion of other constants and there is no constant folding pass after LSE.
504 IsZeroBitPattern(value)) {
505 return nullptr;
Vladimir Marko94539fd2017-11-15 17:52:46 +0000506 }
Vladimir Marko4307cd72020-07-17 14:35:56 +0100507
508 // Check if there is already a suitable TypeConversion we can reuse.
509 for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
510 if (use.GetUser()->IsTypeConversion() &&
511 use.GetUser()->GetType() == expected_type &&
512 // TODO: We could move the TypeConversion to a common dominator
513 // if it does not cross irreducible loop header.
514 use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
515 // Don't share across irreducible loop headers.
516 // TODO: can be more fine-grained than this by testing each dominator.
517 (use.GetUser()->GetBlock() == instruction->GetBlock() ||
518 !GetGraph()->HasIrreducibleLoops())) {
519 if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
520 use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
521 // Move the TypeConversion before the instruction.
522 use.GetUser()->MoveBefore(instruction);
523 }
524 DCHECK(use.GetUser()->StrictlyDominates(instruction));
525 return use.GetUser()->AsTypeConversion();
526 }
527 }
528
529 // We must create a new TypeConversion instruction.
530 HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
531 expected_type, value, instruction->GetDexPc());
532 instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
Vladimir Marko94539fd2017-11-15 17:52:46 +0000533 return type_conversion;
534 }
535
Mingyao Yanga3540532018-01-25 12:17:28 -0800536 // Find an instruction's substitute if it's a removed load.
Mingyao Yang206070c2017-11-29 23:01:58 -0800537 // Return the same instruction if it should not be removed.
Vladimir Marko3224f382020-06-23 14:19:53 +0100538 HInstruction* FindSubstitute(HInstruction* instruction) const {
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100539 size_t id = static_cast<size_t>(instruction->GetId());
540 if (id >= substitute_instructions_for_loads_.size()) {
541 DCHECK(!IsLoad(instruction)); // New Phi (may not be in the graph yet) or default value.
Mingyao Yanga3540532018-01-25 12:17:28 -0800542 return instruction;
543 }
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100544 HInstruction* substitute = substitute_instructions_for_loads_[id];
545 DCHECK(substitute == nullptr || IsLoad(instruction));
546 return (substitute != nullptr) ? substitute : instruction;
Mingyao Yang206070c2017-11-29 23:01:58 -0800547 }
548
Vladimir Marko94539fd2017-11-15 17:52:46 +0000549 void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800550 DCHECK(IsLoad(load));
Vladimir Marko3224f382020-06-23 14:19:53 +0100551 DCHECK_EQ(FindSubstitute(load), load);
Vladimir Marko94539fd2017-11-15 17:52:46 +0000552 DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
553 "Unexpected heap_value that has a substitute " << heap_value->DebugName();
Vladimir Marko94539fd2017-11-15 17:52:46 +0000554
Vladimir Marko4307cd72020-07-17 14:35:56 +0100555 // The load expects to load the heap value as type load->GetType().
556 // However the tracked heap value may not be of that type. An explicit
557 // type conversion may be needed.
558 // There are actually three types involved here:
559 // (1) tracked heap value's type (type A)
560 // (2) heap location (field or element)'s type (type B)
561 // (3) load's type (type C)
562 // We guarantee that type A stored as type B and then fetched out as
563 // type C is the same as casting from type A to type C directly, since
564 // type B and type C will have the same size which is guaranteed in
565 // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
566 // So we only need one type conversion from type A to type C.
567 HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
568 load, heap_value, load->GetType());
569
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100570 substitute_instructions_for_loads_[load->GetId()] =
571 type_conversion != nullptr ? type_conversion : heap_value;
Vladimir Marko94539fd2017-11-15 17:52:46 +0000572 }
573
Vladimir Marko3224f382020-06-23 14:19:53 +0100574 static bool IsLoad(HInstruction* instruction) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800575 // Unresolved load is not treated as a load.
576 return instruction->IsInstanceFieldGet() ||
Vladimir Marko3224f382020-06-23 14:19:53 +0100577 instruction->IsStaticFieldGet() ||
578 instruction->IsVecLoad() ||
579 instruction->IsArrayGet();
Mingyao Yanga3540532018-01-25 12:17:28 -0800580 }
581
Vladimir Marko3224f382020-06-23 14:19:53 +0100582 static bool IsStore(HInstruction* instruction) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800583 // Unresolved store is not treated as a store.
584 return instruction->IsInstanceFieldSet() ||
Vladimir Marko3224f382020-06-23 14:19:53 +0100585 instruction->IsArraySet() ||
586 instruction->IsVecStore() ||
587 instruction->IsStaticFieldSet();
Mingyao Yanga3540532018-01-25 12:17:28 -0800588 }
589
Vladimir Marko3224f382020-06-23 14:19:53 +0100590 // Check if it is allowed to use default values or Phis for the specified load.
591 static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
592 DCHECK(IsLoad(instruction));
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000593 // Using defaults for VecLoads requires to create additional vector operations.
594 // As there are some issues with scheduling vector operations it is better to avoid creating
595 // them.
Vladimir Marko3224f382020-06-23 14:19:53 +0100596 return !instruction->IsVecOperation();
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000597 }
598
Vladimir Marko3224f382020-06-23 14:19:53 +0100599 // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
600 // This is necessary if the stored heap value can be observed.
601 void KeepStores(Value value) {
Alex Light86fe9b82020-11-16 16:54:01 +0000602 if (value.IsPureUnknown()) {
603 return;
604 }
605 if (value.IsMergedUnknown()) {
606 kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value));
607 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
Mingyao Yangfb8464a2015-11-02 10:56:59 -0800608 return;
609 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100610 if (value.NeedsPhi()) {
611 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
612 } else {
613 HInstruction* instruction = value.GetInstruction();
614 DCHECK(IsStore(instruction));
615 kept_stores_.SetBit(instruction->GetId());
Mingyao Yangfb8464a2015-11-02 10:56:59 -0800616 }
617 }
618
Mingyao Yanga3540532018-01-25 12:17:28 -0800619 // If a heap location X may alias with heap location at `loc_index`
620 // and heap_values of that heap location X holds a store, keep that store.
621 // It's needed for a dependent load that's not eliminated since any store
622 // that may put value into the load's heap location needs to be kept.
Vladimir Marko3224f382020-06-23 14:19:53 +0100623 void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
Mingyao Yanga3540532018-01-25 12:17:28 -0800624 size_t loc_index) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100625 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
626 if (i == loc_index) {
627 // We use this function when reading a location with unknown value and
628 // therefore we cannot know what exact store wrote that unknown value.
629 // But we can have a phi placeholder here marking multiple stores to keep.
Alex Light86fe9b82020-11-16 16:54:01 +0000630 DCHECK(
631 !heap_values[i].stored_by.IsInstruction() ||
632 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton());
Vladimir Marko3224f382020-06-23 14:19:53 +0100633 KeepStores(heap_values[i].stored_by);
634 heap_values[i].stored_by = Value::Unknown();
635 } else if (heap_location_collector_.MayAlias(i, loc_index)) {
636 KeepStores(heap_values[i].stored_by);
637 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang58d9bfc2016-11-01 13:31:58 -0700638 }
Mingyao Yang8df69d42015-10-22 15:40:58 -0700639 }
640 }
641
642 // `instruction` is being removed. Try to see if the null check on it
643 // can be removed. This can happen if the same value is set in two branches
644 // but not in dominators. Such as:
645 // int[] a = foo();
646 // if () {
647 // a[0] = 2;
648 // } else {
649 // a[0] = 2;
650 // }
651 // // a[0] can now be replaced with constant 2, and the null check on it can be removed.
652 void TryRemovingNullCheck(HInstruction* instruction) {
653 HInstruction* prev = instruction->GetPrevious();
654 if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
655 // Previous instruction is a null check for this instruction. Remove the null check.
656 prev->ReplaceWith(prev->InputAt(0));
657 prev->GetBlock()->RemoveInstruction(prev);
658 }
659 }
660
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100661 HInstruction* GetDefaultValue(DataType::Type type) {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700662 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100663 case DataType::Type::kReference:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700664 return GetGraph()->GetNullConstant();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100665 case DataType::Type::kBool:
Vladimir Markod5d2f2c2017-09-26 12:37:26 +0100666 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100667 case DataType::Type::kInt8:
668 case DataType::Type::kUint16:
669 case DataType::Type::kInt16:
670 case DataType::Type::kInt32:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700671 return GetGraph()->GetIntConstant(0);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100672 case DataType::Type::kInt64:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700673 return GetGraph()->GetLongConstant(0);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100674 case DataType::Type::kFloat32:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700675 return GetGraph()->GetFloatConstant(0);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100676 case DataType::Type::kFloat64:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700677 return GetGraph()->GetDoubleConstant(0);
678 default:
679 UNREACHABLE();
680 }
681 }
682
Vladimir Marko3224f382020-06-23 14:19:53 +0100683 bool CanValueBeKeptIfSameAsNew(Value value,
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000684 HInstruction* new_value,
685 HInstruction* new_value_set_instr) {
686 // For field/array set location operations, if the value is the same as the new_value
687 // it can be kept even if aliasing happens. All aliased operations will access the same memory
688 // range.
689 // For vector values, this is not true. For example:
690 // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane.
691 // VecStore array[i ,i+1,i+2,i+3] = packed_data;
692 // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
693 // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value
694 // here is not packed_data anymore.
695 //
696 // TODO: to allow such 'same value' optimization on vector data,
697 // LSA needs to report more fine-grain MAY alias information:
698 // (1) May alias due to two vector data partial overlap.
699 // e.g. a[i..i+3] and a[i+1,..,i+4].
700 // (2) May alias due to two vector data may complete overlap each other.
701 // e.g. a[i..i+3] and b[i..i+3].
702 // (3) May alias but the exact relationship between two locations is unknown.
703 // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
704 // This 'same value' optimization can apply only on case (2).
705 if (new_value_set_instr->IsVecOperation()) {
706 return false;
707 }
708
Vladimir Marko3224f382020-06-23 14:19:53 +0100709 return value.Equals(new_value);
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000710 }
711
Vladimir Marko3224f382020-06-23 14:19:53 +0100712 Value PrepareLoopValue(HBasicBlock* block, size_t idx);
713 Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
714 void PrepareLoopRecords(HBasicBlock* block);
715 Value MergePredecessorValues(HBasicBlock* block, size_t idx);
716 void MergePredecessorRecords(HBasicBlock* block);
Mingyao Yanga3540532018-01-25 12:17:28 -0800717
Vladimir Marko3224f382020-06-23 14:19:53 +0100718 void MaterializeNonLoopPhis(const PhiPlaceholder* phi_placeholder, DataType::Type type);
Mingyao Yange9d6e602015-10-23 17:08:42 -0700719
Vladimir Marko3224f382020-06-23 14:19:53 +0100720 void VisitGetLocation(HInstruction* instruction, size_t idx);
721 void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
Mingyao Yanga3540532018-01-25 12:17:28 -0800722
Vladimir Marko3224f382020-06-23 14:19:53 +0100723 void VisitBasicBlock(HBasicBlock* block) override;
724
725 enum class Phase {
726 kLoadElimination,
727 kStoreElimination
728 };
729
730 bool TryReplacingLoopPhiPlaceholderWithDefault(
731 const PhiPlaceholder* phi_placeholder,
732 DataType::Type type,
733 /*inout*/ArenaBitVector* phi_placeholders_to_materialize);
734 bool TryReplacingLoopPhiPlaceholderWithSingleInput(
735 const PhiPlaceholder* phi_placeholder,
736 /*inout*/ArenaBitVector* phi_placeholders_to_materialize);
737 const PhiPlaceholder* FindLoopPhisToMaterialize(
738 const PhiPlaceholder* phi_placeholder,
739 /*out*/ArenaBitVector* phi_placeholders_to_materialize,
740 DataType::Type type,
741 bool can_use_default_or_phi);
742 bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
743 DataType::Type type,
744 Phase phase);
745 bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
746 DataType::Type type,
747 Phase phase);
748 const PhiPlaceholder* TryToMaterializeLoopPhis(const PhiPlaceholder* phi_placeholder,
749 HInstruction* load);
750 void ProcessLoopPhiWithUnknownInput(const PhiPlaceholder* loop_phi_with_unknown_input);
751 void ProcessLoadsRequiringLoopPhis();
752
753 void SearchPhiPlaceholdersForKeptStores();
754 void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
755 void FindOldValueForPhiPlaceholder(const PhiPlaceholder* phi_placeholder, DataType::Type type);
756 void FindStoresWritingOldValues();
Mingyao Yang8df69d42015-10-22 15:40:58 -0700757
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100758 void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700759 HInstruction* object = instruction->InputAt(0);
760 const FieldInfo& field = instruction->GetFieldInfo();
761 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
Mingyao Yang8df69d42015-10-22 15:40:58 -0700762 }
763
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100764 void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700765 HInstruction* object = instruction->InputAt(0);
766 const FieldInfo& field = instruction->GetFieldInfo();
Mingyao Yang8df69d42015-10-22 15:40:58 -0700767 HInstruction* value = instruction->InputAt(1);
Aart Bikb765a3f2018-05-10 14:47:48 -0700768 size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
769 VisitSetLocation(instruction, idx, value);
Mingyao Yang8df69d42015-10-22 15:40:58 -0700770 }
771
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100772 void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700773 HInstruction* cls = instruction->InputAt(0);
Aart Bikb765a3f2018-05-10 14:47:48 -0700774 const FieldInfo& field = instruction->GetFieldInfo();
775 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
Mingyao Yang8df69d42015-10-22 15:40:58 -0700776 }
777
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100778 void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700779 HInstruction* cls = instruction->InputAt(0);
Aart Bikb765a3f2018-05-10 14:47:48 -0700780 const FieldInfo& field = instruction->GetFieldInfo();
Vladimir Marko3224f382020-06-23 14:19:53 +0100781 HInstruction* value = instruction->InputAt(1);
Aart Bikb765a3f2018-05-10 14:47:48 -0700782 size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
Vladimir Marko3224f382020-06-23 14:19:53 +0100783 VisitSetLocation(instruction, idx, value);
Mingyao Yang8df69d42015-10-22 15:40:58 -0700784 }
785
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100786 void VisitArrayGet(HArrayGet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700787 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
Mingyao Yang8df69d42015-10-22 15:40:58 -0700788 }
789
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100790 void VisitArraySet(HArraySet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700791 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000792 VisitSetLocation(instruction, idx, instruction->GetValue());
793 }
794
795 void VisitVecLoad(HVecLoad* instruction) override {
796 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
797 }
798
799 void VisitVecStore(HVecStore* instruction) override {
800 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
801 VisitSetLocation(instruction, idx, instruction->GetValue());
Mingyao Yang8df69d42015-10-22 15:40:58 -0700802 }
803
Andreas Gampefa6a1b02018-09-07 08:11:55 -0700804 void VisitDeoptimize(HDeoptimize* instruction) override {
Vladimir Marko3224f382020-06-23 14:19:53 +0100805 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yangeb2d2d346e2017-03-02 13:26:17 -0800806 heap_values_for_[instruction->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100807 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
808 Value* stored_by = &heap_values[i].stored_by;
809 if (stored_by->IsUnknown()) {
810 continue;
811 }
812 // Stores are generally observeable after deoptimization, except
813 // for singletons that don't escape in the deoptimization environment.
814 bool observable = true;
815 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
816 if (info->IsSingleton()) {
817 HInstruction* reference = info->GetReference();
818 // Finalizable objects always escape.
819 if (!reference->IsNewInstance() || !reference->AsNewInstance()->IsFinalizable()) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800820 // Check whether the reference for a store is used by an environment local of
Vladimir Marko3224f382020-06-23 14:19:53 +0100821 // the HDeoptimize. If not, the singleton is not observed after deoptimization.
822 const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
823 observable = std::any_of(
824 env_uses.begin(),
825 env_uses.end(),
826 [instruction](const HUseListNode<HEnvironment*>& use) {
827 return use.GetUser()->GetHolder() == instruction;
828 });
Mingyao Yangeb2d2d346e2017-03-02 13:26:17 -0800829 }
830 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100831 if (observable) {
832 KeepStores(*stored_by);
833 *stored_by = Value::Unknown();
834 }
Mingyao Yangeb2d2d346e2017-03-02 13:26:17 -0800835 }
836 }
837
Mingyao Yang46721ef2017-10-05 14:45:17 -0700838 // Keep necessary stores before exiting a method via return/throw.
839 void HandleExit(HBasicBlock* block) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100840 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
841 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700842 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
Alex Light86fe9b82020-11-16 16:54:01 +0000843 if (!ref_info->IsSingletonAndRemovable() &&
844 !(ref_info->IsPartialSingleton() && IsPartialNoEscape(block, i))) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100845 KeepStores(heap_values[i].stored_by);
846 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang46721ef2017-10-05 14:45:17 -0700847 }
848 }
849 }
850
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100851 void VisitReturn(HReturn* instruction) override {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700852 HandleExit(instruction->GetBlock());
853 }
854
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100855 void VisitReturnVoid(HReturnVoid* return_void) override {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700856 HandleExit(return_void->GetBlock());
857 }
858
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100859 void VisitThrow(HThrow* throw_instruction) override {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700860 HandleExit(throw_instruction->GetBlock());
861 }
862
Mingyao Yang293f1c02017-11-08 15:22:17 -0800863 void HandleInvoke(HInstruction* instruction) {
864 SideEffects side_effects = instruction->GetSideEffects();
Vladimir Marko3224f382020-06-23 14:19:53 +0100865 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yang293f1c02017-11-08 15:22:17 -0800866 heap_values_for_[instruction->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100867 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700868 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
Alex Light86fe9b82020-11-16 16:54:01 +0000869 ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
870 ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts();
871 HBasicBlock* blk = instruction->GetBlock();
872 // We don't need to do anything if the reference has not escaped at this point.
873 // This is true if either we (1) never escape or (2) sometimes escape but
874 // there is no possible execution where we have done so at this time. NB
875 // We count being in the excluded cohort as escaping. Technically, this is
876 // a bit over-conservative (since we can have multiple non-escaping calls
877 // before a single escaping one) but this simplifies everything greatly.
878 if (ref_info->IsSingleton() ||
879 // partial and we aren't currently escaping and we haven't escaped yet.
880 (ref_info->IsPartialSingleton() && ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk) &&
881 std::none_of(cohorts.begin(),
882 cohorts.end(),
883 [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
884 return cohort.PrecedesBlock(blk);
885 }))) {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700886 // Singleton references cannot be seen by the callee.
887 } else {
Vladimir Marko3224f382020-06-23 14:19:53 +0100888 if (side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
889 // Previous stores may become visible (read) and/or impossible for LSE to track (write).
890 KeepStores(heap_values[i].stored_by);
891 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang293f1c02017-11-08 15:22:17 -0800892 }
893 if (side_effects.DoesAnyWrite()) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100894 // The value may be clobbered.
895 heap_values[i].value = Value::Unknown();
Mingyao Yang293f1c02017-11-08 15:22:17 -0800896 }
Mingyao Yang8df69d42015-10-22 15:40:58 -0700897 }
898 }
899 }
900
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100901 void VisitInvoke(HInvoke* invoke) override {
Orion Hodsonac141392017-01-13 11:53:47 +0000902 HandleInvoke(invoke);
903 }
904
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100905 void VisitClinitCheck(HClinitCheck* clinit) override {
Vladimir Marko3224f382020-06-23 14:19:53 +0100906 // Class initialization check can result in class initializer calling arbitrary methods.
Mingyao Yang8df69d42015-10-22 15:40:58 -0700907 HandleInvoke(clinit);
908 }
909
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100910 void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700911 // Conservatively treat it as an invocation.
912 HandleInvoke(instruction);
913 }
914
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100915 void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700916 // Conservatively treat it as an invocation.
917 HandleInvoke(instruction);
918 }
919
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100920 void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700921 // Conservatively treat it as an invocation.
922 HandleInvoke(instruction);
923 }
924
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100925 void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700926 // Conservatively treat it as an invocation.
927 HandleInvoke(instruction);
928 }
929
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100930 void VisitNewInstance(HNewInstance* new_instance) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700931 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
932 if (ref_info == nullptr) {
933 // new_instance isn't used for field accesses. No need to process it.
934 return;
935 }
Mingyao Yang025c1a62017-10-30 11:19:57 -0700936 if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
937 DCHECK(!new_instance->IsFinalizable());
Mingyao Yang7cf9af22018-02-06 15:02:42 -0800938 // new_instance can potentially be eliminated.
Mingyao Yang062157f2016-03-02 10:15:36 -0800939 singleton_new_instances_.push_back(new_instance);
Mingyao Yang8df69d42015-10-22 15:40:58 -0700940 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100941 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yang8df69d42015-10-22 15:40:58 -0700942 heap_values_for_[new_instance->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100943 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700944 HInstruction* ref =
945 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->GetReference();
946 size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
Alex Light2610dfe2020-12-07 16:26:43 -0800947 if (ref == new_instance) {
948 if (offset >= mirror::kObjectHeaderSize) {
949 // Instance fields except the header fields are set to default heap values.
950 heap_values[i].value = Value::Default();
951 heap_values[i].stored_by = Value::Unknown();
952 } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) {
953 // The shadow$_klass_ field is special and has an actual value however.
954 heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
955 heap_values[i].stored_by = Value::Unknown();
956 }
Mingyao Yang8df69d42015-10-22 15:40:58 -0700957 }
958 }
959 }
960
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100961 void VisitNewArray(HNewArray* new_array) override {
Mingyao Yang86974902017-03-01 14:03:51 -0800962 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
963 if (ref_info == nullptr) {
964 // new_array isn't used for array accesses. No need to process it.
965 return;
966 }
967 if (ref_info->IsSingletonAndRemovable()) {
Mingyao Yang7cf9af22018-02-06 15:02:42 -0800968 if (new_array->GetLength()->IsIntConstant() &&
969 new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
970 // new_array can potentially be eliminated.
971 singleton_new_instances_.push_back(new_array);
972 } else {
973 // new_array may throw NegativeArraySizeException. Keep it.
974 }
Mingyao Yang86974902017-03-01 14:03:51 -0800975 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100976 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yang86974902017-03-01 14:03:51 -0800977 heap_values_for_[new_array->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100978 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang86974902017-03-01 14:03:51 -0800979 HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
980 HInstruction* ref = location->GetReferenceInfo()->GetReference();
981 if (ref == new_array && location->GetIndex() != nullptr) {
982 // Array elements are set to default heap values.
Vladimir Marko3224f382020-06-23 14:19:53 +0100983 heap_values[i].value = Value::Default();
984 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang86974902017-03-01 14:03:51 -0800985 }
986 }
987 }
988
Mingyao Yang8df69d42015-10-22 15:40:58 -0700989 const HeapLocationCollector& heap_location_collector_;
Mingyao Yang8df69d42015-10-22 15:40:58 -0700990
Vladimir Marko009d1662017-10-10 13:21:15 +0100991 // Use local allocator for allocating memory.
992 ScopedArenaAllocator allocator_;
993
Vladimir Marko3224f382020-06-23 14:19:53 +0100994 // Phi placeholders used for keeping track of values and stores for multiple predecessors.
995 ScopedArenaVector<PhiPlaceholder> phi_placeholders_;
996
997 // The start of the Phi placeholders in the `phi_placeholders_`
998 // for each block with multiple predecessors.
999 ScopedArenaVector<size_t> phi_placeholders_begin_for_block_;
1000
1001 // One array of heap value records for each block.
1002 ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
Mingyao Yang8df69d42015-10-22 15:40:58 -07001003
Vladimir Marko3224f382020-06-23 14:19:53 +01001004 // We record loads and stores for re-processing when we find a loop Phi placeholder
1005 // with unknown value from a predecessor, and also for removing stores that are
1006 // found to be dead, i.e. not marked in `kept_stores_` at the end.
1007 struct LoadStoreRecord {
1008 HInstruction* load_or_store;
1009 size_t heap_location_index;
1010 };
1011 ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
1012
Vladimir Marko9e3fe992020-08-25 16:17:51 +01001013 // We record the substitute instructions for loads that should be
1014 // eliminated but may be used by heap locations. They'll be removed
1015 // in the end. These are indexed by the load's id.
1016 ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
1017
Vladimir Marko3224f382020-06-23 14:19:53 +01001018 // Record stores to keep in a bit vector indexed by instruction ID.
1019 ArenaBitVector kept_stores_;
1020 // When we need to keep all stores that feed a Phi placeholder, we just record the
1021 // index of that placeholder for processing after graph traversal.
1022 ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
1023
1024 // Loads that would require a loop Phi to replace are recorded for processing
1025 // later as we do not have enough information from back-edges to determine if
1026 // a suitable Phi can be found or created when we visit these loads.
1027 ScopedArenaHashMap<HInstruction*, ValueRecord> loads_requiring_loop_phi_;
1028
1029 // For stores, record the old value records that were replaced and the stored values.
1030 struct StoreRecord {
1031 ValueRecord old_value_record;
1032 HInstruction* stored_value;
1033 };
1034 ScopedArenaHashMap<HInstruction*, StoreRecord> store_records_;
1035
1036 // Replacements for Phi placeholders.
1037 // The unknown heap value is used to mark Phi placeholders that cannot be replaced.
1038 ScopedArenaVector<Value> phi_placeholder_replacements_;
Mingyao Yangfb8464a2015-11-02 10:56:59 -08001039
Alex Light86fe9b82020-11-16 16:54:01 +00001040 // Merged-unknowns that must have their predecessor values kept to ensure
1041 // partially escaped values are written
1042 ArenaBitVector kept_merged_unknowns_;
1043
Vladimir Marko009d1662017-10-10 13:21:15 +01001044 ScopedArenaVector<HInstruction*> singleton_new_instances_;
Mingyao Yang8df69d42015-10-22 15:40:58 -07001045
Alex Light86fe9b82020-11-16 16:54:01 +00001046 friend std::ostream& operator<<(std::ostream& os, const Value& v);
1047
Mingyao Yang8df69d42015-10-22 15:40:58 -07001048 DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
1049};
1050
Alex Light86fe9b82020-11-16 16:54:01 +00001051std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
1052 switch (type_) {
1053 case Type::kDefault:
1054 return os << "Default";
1055 case Type::kInstruction:
1056 return os << "Instruction[id: " << instruction_->GetId()
1057 << ", block: " << instruction_->GetBlock()->GetBlockId() << "]";
1058 case Type::kUnknown:
1059 return os << "Unknown";
1060 case Type::kInvalid:
1061 return os << "Invalid";
1062 case Type::kMergedUnknown:
1063 return os << "MergedUnknown[block: " << phi_placeholder_->GetBlockId()
1064 << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]";
1065 case Type::kNeedsLoopPhi:
1066 return os << "NeedsLoopPhi[block: " << phi_placeholder_->GetBlockId()
1067 << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]";
1068 case Type::kNeedsNonLoopPhi:
1069 return os << "NeedsNonLoopPhi[block: " << phi_placeholder_->GetBlockId()
1070 << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]";
1071 }
1072}
1073
1074std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
1075 return v.Dump(os);
1076}
1077
Vladimir Marko3224f382020-06-23 14:19:53 +01001078ScopedArenaVector<LSEVisitor::PhiPlaceholder> LSEVisitor::CreatePhiPlaceholders(
1079 HGraph* graph,
1080 const HeapLocationCollector& heap_location_collector,
1081 ScopedArenaAllocator* allocator) {
1082 size_t num_phi_placeholders = 0u;
1083 size_t num_heap_locations = heap_location_collector.GetNumberOfHeapLocations();
1084 for (HBasicBlock* block : graph->GetReversePostOrder()) {
1085 if (block->GetPredecessors().size() >= 2u) {
1086 num_phi_placeholders += num_heap_locations;
1087 }
1088 }
1089 ScopedArenaVector<PhiPlaceholder> phi_placeholders(allocator->Adapter(kArenaAllocLSE));
1090 phi_placeholders.reserve(num_phi_placeholders);
1091 for (HBasicBlock* block : graph->GetReversePostOrder()) {
1092 if (block->GetPredecessors().size() >= 2u) {
1093 // Create Phi placeholders referencing the block by the block ID.
1094 DCHECK_LE(num_heap_locations, phi_placeholders.capacity() - phi_placeholders.size());
1095 uint32_t block_id = block->GetBlockId();
1096 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
1097 phi_placeholders.push_back(PhiPlaceholder(block_id, idx));
1098 }
1099 }
1100 }
1101 return phi_placeholders;
1102}
1103
1104ScopedArenaVector<size_t> LSEVisitor::CreatePhiPlaceholdersBeginForBlock(
1105 HGraph* graph,
1106 const HeapLocationCollector& heap_location_collector,
1107 ScopedArenaAllocator* allocator) {
1108 size_t num_phi_placeholders = 0u;
1109 size_t num_heap_locations = heap_location_collector.GetNumberOfHeapLocations();
1110 ScopedArenaVector<size_t> phi_placeholders_begin_for_block(graph->GetBlocks().size(),
1111 allocator->Adapter(kArenaAllocLSE));
1112 for (HBasicBlock* block : graph->GetReversePostOrder()) {
1113 if (block->GetPredecessors().size() >= 2u) {
1114 phi_placeholders_begin_for_block[block->GetBlockId()] = num_phi_placeholders;
1115 num_phi_placeholders += num_heap_locations;
1116 }
1117 }
1118 return phi_placeholders_begin_for_block;
1119}
1120
1121LSEVisitor::LSEVisitor(HGraph* graph,
1122 const HeapLocationCollector& heap_location_collector,
1123 OptimizingCompilerStats* stats)
1124 : HGraphDelegateVisitor(graph, stats),
1125 heap_location_collector_(heap_location_collector),
1126 allocator_(graph->GetArenaStack()),
1127 phi_placeholders_(CreatePhiPlaceholders(graph, heap_location_collector, &allocator_)),
1128 phi_placeholders_begin_for_block_(
1129 CreatePhiPlaceholdersBeginForBlock(graph, heap_location_collector, &allocator_)),
1130 heap_values_for_(graph->GetBlocks().size(),
1131 ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1132 allocator_.Adapter(kArenaAllocLSE)),
Vladimir Marko3224f382020-06-23 14:19:53 +01001133 loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
Vladimir Marko9e3fe992020-08-25 16:17:51 +01001134 // We may add new instructions (default values, Phis) but we're not adding loads
1135 // or stores, so we shall not need to resize following vector and BitVector.
1136 substitute_instructions_for_loads_(graph->GetCurrentInstructionId(),
1137 nullptr,
1138 allocator_.Adapter(kArenaAllocLSE)),
Vladimir Marko3224f382020-06-23 14:19:53 +01001139 kept_stores_(&allocator_,
1140 /*start_bits=*/ graph->GetCurrentInstructionId(),
1141 /*expandable=*/ false,
1142 kArenaAllocLSE),
1143 phi_placeholders_to_search_for_kept_stores_(&allocator_,
1144 phi_placeholders_.size(),
1145 /*expandable=*/ false,
1146 kArenaAllocLSE),
1147 loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1148 store_records_(allocator_.Adapter(kArenaAllocLSE)),
1149 phi_placeholder_replacements_(phi_placeholders_.size(),
1150 Value::Invalid(),
1151 allocator_.Adapter(kArenaAllocLSE)),
Alex Light86fe9b82020-11-16 16:54:01 +00001152 kept_merged_unknowns_(&allocator_,
1153 /*start_bits=*/ phi_placeholders_.size(),
1154 /*expandable=*/ false,
1155 kArenaAllocLSE),
Vladimir Marko3224f382020-06-23 14:19:53 +01001156 singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) {
1157 // Clear bit vectors.
1158 phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
1159 kept_stores_.ClearAllBits();
1160}
1161
1162LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1163 // If the pre-header value is known (which implies that the reference dominates this
1164 // block), use a Phi placeholder for the value in the loop header. If all predecessors
1165 // are later found to have a known value, we can replace loads from this location,
1166 // either with the pre-header value or with a new Phi. For array locations, the index
1167 // may be defined inside the loop but the only known value in that case should be the
1168 // default value or a Phi placeholder that can be replaced only with the default value.
1169 HLoopInformation* loop_info = block->GetLoopInformation();
1170 uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1171 Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1172 if (pre_header_value.IsUnknown()) {
Alex Light86fe9b82020-11-16 16:54:01 +00001173 return pre_header_value;
Vladimir Marko3224f382020-06-23 14:19:53 +01001174 }
1175 if (kIsDebugBuild) {
1176 // Check that the reference indeed dominates this loop.
1177 HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1178 HInstruction* ref = location->GetReferenceInfo()->GetReference();
Alex Light86fe9b82020-11-16 16:54:01 +00001179 CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
1180 << GetGraph()->PrettyMethod();
Vladimir Marko3224f382020-06-23 14:19:53 +01001181 // Check that the index, if defined inside the loop, tracks a default value
1182 // or a Phi placeholder requiring a loop Phi.
1183 HInstruction* index = location->GetIndex();
1184 if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
Alex Light86fe9b82020-11-16 16:54:01 +00001185 CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
1186 << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
1187 << pre_header_value;
Vladimir Marko3224f382020-06-23 14:19:53 +01001188 }
1189 }
1190 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1191 return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1192}
1193
1194LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1195 // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1196 // if the value in the location escapes. This is not applicable to singletons that are
1197 // defined inside the loop as they shall be dead in the loop header.
1198 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1199 if (ref_info->IsSingleton() &&
1200 block->GetLoopInformation()->Contains(*ref_info->GetReference()->GetBlock())) {
1201 return Value::Unknown();
1202 }
1203 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1204 return Value::ForLoopPhiPlaceholder(phi_placeholder);
1205}
1206
1207void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1208 DCHECK(block->IsLoopHeader());
1209 int block_id = block->GetBlockId();
1210 HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1211 ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1212 heap_values_for_[pre_header->GetBlockId()];
1213 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1214 DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1215 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1216 DCHECK(heap_values.empty());
1217
1218 // Don't eliminate loads in irreducible loops.
1219 if (block->GetLoopInformation()->IsIrreducible()) {
1220 heap_values.resize(num_heap_locations,
1221 { /*value=*/ Value::Unknown(), /*stored_by=*/ Value::Unknown() });
1222 // Also keep the stores before the loop header, including in blocks that were not visited yet.
1223 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1224 KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1225 }
1226 return;
1227 }
1228
1229 // Fill `heap_values` based on values from pre-header.
1230 heap_values.reserve(num_heap_locations);
1231 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1232 heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1233 }
1234}
1235
1236LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1237 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1238 DCHECK(!predecessors.empty());
1239 Value merged_value =
1240 ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1241 for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
Vladimir Marko3224f382020-06-23 14:19:53 +01001242 Value pred_value =
1243 ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
Alex Light86fe9b82020-11-16 16:54:01 +00001244 if (pred_value.Equals(merged_value)) {
1245 // Value is the same. No need to update our merged value.
1246 continue;
1247 } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
1248 // If one is unknown and the other is a different type of unknown
1249 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1250 merged_value = Value::MergedUnknown(phi_placeholder);
1251 // We know that at least one of the merge points is unknown (and both are
1252 // not pure-unknowns since that's captured above). This means that the
1253 // overall value needs to be a MergedUnknown. Just return that.
1254 break;
1255 } else {
Vladimir Marko3224f382020-06-23 14:19:53 +01001256 // There are conflicting known values. We may still be able to replace loads with a Phi.
1257 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1258 // Propagate the need for a new loop Phi from all predecessors.
1259 bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1260 merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1261 }
1262 }
Alex Light86fe9b82020-11-16 16:54:01 +00001263 DCHECK(!merged_value.IsPureUnknown() || block->GetPredecessors().size() <= 1)
1264 << merged_value << " in " << GetGraph()->PrettyMethod();
Vladimir Marko3224f382020-06-23 14:19:53 +01001265 return merged_value;
1266}
1267
1268void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1269 if (block->IsExitBlock()) {
1270 // Exit block doesn't really merge values since the control flow ends in
1271 // its predecessors. Each predecessor needs to make sure stores are kept
1272 // if necessary.
1273 return;
1274 }
1275
1276 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1277 DCHECK(heap_values.empty());
1278 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1279 if (block->GetPredecessors().empty()) {
1280 DCHECK(block->IsEntryBlock());
1281 heap_values.resize(num_heap_locations,
1282 { /*value=*/ Value::Unknown(), /*stored_by=*/ Value::Unknown() });
1283 return;
1284 }
1285
1286 heap_values.reserve(num_heap_locations);
1287 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1288 Value merged_value = MergePredecessorValues(block, idx);
1289 if (kIsDebugBuild) {
1290 if (merged_value.NeedsPhi()) {
1291 uint32_t block_id = merged_value.GetPhiPlaceholder()->GetBlockId();
1292 CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1293 } else if (merged_value.IsInstruction()) {
1294 CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1295 }
1296 }
1297 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1298 Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
Vladimir Markocbeedc82020-08-25 14:31:10 +01001299 for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1300 uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1301 Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1302 if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1303 !merged_stored_by.Equals(stored_by)) {
1304 // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1305 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1306 merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1307 break;
1308 }
Vladimir Marko3224f382020-06-23 14:19:53 +01001309 }
1310 heap_values.push_back({ merged_value, merged_stored_by });
1311 }
1312}
1313
1314static HInstruction* FindOrConstructNonLoopPhi(
1315 HBasicBlock* block,
1316 const ScopedArenaVector<HInstruction*>& phi_inputs,
1317 DataType::Type type) {
1318 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1319 HInstruction* phi = phi_it.Current();
1320 DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1321 auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1322 return lhs == rhs.GetInstruction();
1323 };
1324 if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1325 return phi;
1326 }
1327 }
1328 ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1329 HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1330 for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1331 DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1332 phi->SetRawInputAt(i, phi_inputs[i]);
1333 }
1334 block->AddPhi(phi);
1335 if (type == DataType::Type::kReference) {
1336 // Update reference type information. Pass invalid handles, these are not used for Phis.
1337 ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1338 Handle<mirror::ClassLoader>(),
1339 Handle<mirror::DexCache>(),
1340 /* is_first_run= */ false);
1341 rtp_fixup.Visit(phi);
1342 }
1343 return phi;
1344}
1345
1346void LSEVisitor::MaterializeNonLoopPhis(const PhiPlaceholder* phi_placeholder,
1347 DataType::Type type) {
1348 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1349 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1350 size_t idx = phi_placeholder->GetHeapLocation();
1351
1352 // Use local allocator to reduce peak memory usage.
1353 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1354 // Reuse the same vector for collecting phi inputs.
1355 ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1356
1357 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1358 work_queue.push_back(phi_placeholder);
1359 while (!work_queue.empty()) {
1360 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1361 if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1362 // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1363 // that directly or indirectly depends on it, so it was already processed as part of the
1364 // other Phi placeholder's dependencies before this one got back to the top of the stack.
1365 work_queue.pop_back();
1366 continue;
1367 }
1368 uint32_t current_block_id = current_phi_placeholder->GetBlockId();
1369 HBasicBlock* current_block = blocks[current_block_id];
1370 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1371
1372 // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1373 // And the only way for such merged value to reach a different heap location is through
1374 // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1375 // seen here are tied to one heap location.
1376 DCHECK(!current_block->IsLoopHeader());
1377 DCHECK_EQ(current_phi_placeholder->GetHeapLocation(), idx);
1378
1379 phi_inputs.clear();
1380 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1381 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
Alex Light86fe9b82020-11-16 16:54:01 +00001382 DCHECK(!pred_value.IsUnknown())
1383 << "block " << current_block->GetBlockId() << " pred: " << predecessor->GetBlockId();
Vladimir Marko3224f382020-06-23 14:19:53 +01001384 if (pred_value.NeedsNonLoopPhi()) {
1385 // We need to process the Phi placeholder first.
1386 work_queue.push_back(pred_value.GetPhiPlaceholder());
1387 } else if (pred_value.IsDefault()) {
1388 phi_inputs.push_back(GetDefaultValue(type));
1389 } else {
1390 phi_inputs.push_back(pred_value.GetInstruction());
1391 }
1392 }
1393 if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1394 // All inputs are available. Find or construct the Phi replacement.
1395 phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1396 Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1397 // Remove the block from the queue.
1398 DCHECK_EQ(current_phi_placeholder, work_queue.back());
1399 work_queue.pop_back();
1400 }
1401 }
1402}
1403
1404void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1405 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1406 uint32_t block_id = instruction->GetBlock()->GetBlockId();
1407 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1408 ValueRecord& record = heap_values[idx];
1409 DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1410 loads_and_stores_.push_back({ instruction, idx });
1411 if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1412 !IsDefaultOrPhiAllowedForLoad(instruction)) {
1413 record.value = Value::Unknown();
1414 }
1415 if (record.value.IsDefault()) {
Vladimir Markocbeedc82020-08-25 14:31:10 +01001416 KeepStores(record.stored_by);
Vladimir Marko3224f382020-06-23 14:19:53 +01001417 HInstruction* constant = GetDefaultValue(instruction->GetType());
1418 AddRemovedLoad(instruction, constant);
1419 record.value = Value::ForInstruction(constant);
1420 } else if (record.value.IsUnknown()) {
1421 // Load isn't eliminated. Put the load as the value into the HeapLocation.
1422 // This acts like GVN but with better aliasing analysis.
Alex Light86fe9b82020-11-16 16:54:01 +00001423 Value old_value = record.value;
Vladimir Marko3224f382020-06-23 14:19:53 +01001424 record.value = Value::ForInstruction(instruction);
1425 KeepStoresIfAliasedToLocation(heap_values, idx);
Alex Light86fe9b82020-11-16 16:54:01 +00001426 KeepStores(old_value);
Vladimir Marko3224f382020-06-23 14:19:53 +01001427 } else if (record.value.NeedsLoopPhi()) {
1428 // We do not know yet if the value is known for all back edges. Record for future processing.
1429 loads_requiring_loop_phi_.insert(std::make_pair(instruction, record));
1430 } else {
1431 // This load can be eliminated but we may need to construct non-loop Phis.
1432 if (record.value.NeedsNonLoopPhi()) {
1433 MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1434 record.value = Replacement(record.value);
1435 }
1436 HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1437 AddRemovedLoad(instruction, heap_value);
1438 TryRemovingNullCheck(instruction);
1439 }
1440}
1441
1442void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1443 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1444 DCHECK(!IsStore(value)) << value->DebugName();
1445 // value may already have a substitute.
1446 value = FindSubstitute(value);
1447 HBasicBlock* block = instruction->GetBlock();
1448 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1449 ValueRecord& record = heap_values[idx];
1450 DCHECK(!record.value.IsInstruction() ||
1451 FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1452
1453 if (record.value.Equals(value)) {
1454 // Store into the heap location with the same value.
1455 // This store can be eliminated right away.
1456 block->RemoveInstruction(instruction);
1457 return;
1458 }
1459
1460 store_records_.insert(std::make_pair(instruction, StoreRecord{record, value}));
1461 loads_and_stores_.push_back({ instruction, idx });
1462
1463 // If the `record.stored_by` specified a store from this block, it shall be removed
1464 // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1465 // `kept_stores_` anymore after we update the `record.stored_by` below.
1466 DCHECK(!record.stored_by.IsInstruction() ||
1467 record.stored_by.GetInstruction()->GetBlock() != block ||
1468 record.stored_by.GetInstruction()->CanThrow() ||
1469 !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1470
1471 if (instruction->CanThrow()) {
1472 // Previous stores can become visible.
1473 HandleExit(instruction->GetBlock());
1474 // We cannot remove a possibly throwing store.
1475 // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1476 kept_stores_.SetBit(instruction->GetId());
1477 }
1478
1479 // Update the record.
1480 auto it = loads_requiring_loop_phi_.find(value);
1481 if (it != loads_requiring_loop_phi_.end()) {
1482 // Propapate the Phi placeholder to the record.
1483 record.value = it->second.value;
1484 DCHECK(record.value.NeedsLoopPhi());
1485 } else {
1486 record.value = Value::ForInstruction(value);
1487 }
1488 // Track the store in the value record. If the value is loaded or needed after
1489 // return/deoptimization later, this store isn't really redundant.
1490 record.stored_by = Value::ForInstruction(instruction);
1491
1492 // This store may kill values in other heap locations due to aliasing.
1493 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1494 if (i == idx ||
1495 heap_values[i].value.IsUnknown() ||
1496 CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1497 !heap_location_collector_.MayAlias(i, idx)) {
1498 continue;
1499 }
1500 // Kill heap locations that may alias and keep previous stores to these locations.
1501 KeepStores(heap_values[i].stored_by);
1502 heap_values[i].stored_by = Value::Unknown();
1503 heap_values[i].value = Value::Unknown();
1504 }
1505}
1506
1507void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1508 // Populate the heap_values array for this block.
1509 // TODO: try to reuse the heap_values array from one predecessor if possible.
1510 if (block->IsLoopHeader()) {
1511 PrepareLoopRecords(block);
1512 } else {
1513 MergePredecessorRecords(block);
1514 }
1515 // Visit instructions.
1516 HGraphVisitor::VisitBasicBlock(block);
1517}
1518
1519bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1520 const PhiPlaceholder* phi_placeholder,
1521 DataType::Type type,
1522 /*inout*/ArenaBitVector* phi_placeholders_to_materialize) {
1523 // Use local allocator to reduce peak memory usage.
1524 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1525 ArenaBitVector visited(&allocator,
1526 /*start_bits=*/ phi_placeholders_.size(),
1527 /*expandable=*/ false,
1528 kArenaAllocLSE);
1529 visited.ClearAllBits();
1530 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1531
1532 // Use depth first search to check if any non-Phi input is unknown.
1533 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1534 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1535 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1536 work_queue.push_back(phi_placeholder);
1537 while (!work_queue.empty()) {
1538 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1539 work_queue.pop_back();
1540 HBasicBlock* block = blocks[current_phi_placeholder->GetBlockId()];
1541 DCHECK_GE(block->GetPredecessors().size(), 2u);
1542 size_t idx = current_phi_placeholder->GetHeapLocation();
1543 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1544 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1545 if (value.NeedsPhi()) {
1546 // Visit the predecessor Phi placeholder if it's not visited yet.
1547 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1548 visited.SetBit(PhiPlaceholderIndex(value));
1549 work_queue.push_back(value.GetPhiPlaceholder());
1550 }
1551 } else if (!value.Equals(Value::Default())) {
1552 return false; // Report failure.
1553 }
1554 }
1555 if (block->IsLoopHeader()) {
1556 // For back-edges we need to check all locations that write to the same array,
1557 // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1558 // as they may actually refer to the same locations for different iterations.
1559 for (size_t i = 0; i != num_heap_locations; ++i) {
1560 if (i == idx ||
1561 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1562 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1563 continue;
1564 }
1565 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1566 // Check if there were any writes to this location.
1567 // Note: We could simply process the values but due to the vector operation
1568 // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1569 // the value to change and not be equal to default. To work around this and
1570 // allow replacing the non-vector load of loop-invariant default values
1571 // anyway, skip over paths that do not have any writes.
1572 ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1573 while (record.stored_by.NeedsLoopPhi() &&
1574 blocks[record.stored_by.GetPhiPlaceholder()->GetBlockId()]->IsLoopHeader()) {
1575 HLoopInformation* loop_info =
1576 blocks[record.stored_by.GetPhiPlaceholder()->GetBlockId()]->GetLoopInformation();
1577 record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1578 }
1579 Value value = ReplacementOrValue(record.value);
1580 if (value.NeedsPhi()) {
1581 // Visit the predecessor Phi placeholder if it's not visited yet.
1582 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1583 visited.SetBit(PhiPlaceholderIndex(value));
1584 work_queue.push_back(value.GetPhiPlaceholder());
1585 }
1586 } else if (!value.Equals(Value::Default())) {
1587 return false; // Report failure.
1588 }
1589 }
1590 }
1591 }
1592 }
1593
1594 // Record replacement and report success.
1595 HInstruction* replacement = GetDefaultValue(type);
1596 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1597 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1598 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1599 }
1600 phi_placeholders_to_materialize->Subtract(&visited);
1601 return true;
1602}
1603
1604bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1605 const PhiPlaceholder* phi_placeholder,
1606 /*inout*/ArenaBitVector* phi_placeholders_to_materialize) {
1607 // Use local allocator to reduce peak memory usage.
1608 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1609 ArenaBitVector visited(&allocator,
1610 /*start_bits=*/ phi_placeholders_.size(),
1611 /*expandable=*/ false,
1612 kArenaAllocLSE);
1613 visited.ClearAllBits();
1614 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1615
1616 // Use depth first search to check if any non-Phi input is unknown.
1617 HInstruction* replacement = nullptr;
1618 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1619 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1620 work_queue.push_back(phi_placeholder);
1621 while (!work_queue.empty()) {
1622 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1623 work_queue.pop_back();
1624 HBasicBlock* current_block = blocks[current_phi_placeholder->GetBlockId()];
1625 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1626 size_t idx = current_phi_placeholder->GetHeapLocation();
1627 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1628 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1629 if (value.NeedsPhi()) {
1630 // Visit the predecessor Phi placeholder if it's not visited yet.
1631 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1632 visited.SetBit(PhiPlaceholderIndex(value));
1633 work_queue.push_back(value.GetPhiPlaceholder());
1634 }
1635 } else {
1636 if (!value.IsInstruction() ||
1637 (replacement != nullptr && replacement != value.GetInstruction())) {
1638 return false; // Report failure.
1639 }
1640 replacement = value.GetInstruction();
1641 }
1642 }
1643 }
1644
1645 // Record replacement and report success.
1646 DCHECK(replacement != nullptr);
1647 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1648 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1649 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1650 }
1651 phi_placeholders_to_materialize->Subtract(&visited);
1652 return true;
1653}
1654
1655const LSEVisitor::PhiPlaceholder* LSEVisitor::FindLoopPhisToMaterialize(
1656 const PhiPlaceholder* phi_placeholder,
1657 /*inout*/ArenaBitVector* phi_placeholders_to_materialize,
1658 DataType::Type type,
1659 bool can_use_default_or_phi) {
1660 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1661
1662 // Use local allocator to reduce peak memory usage.
1663 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1664 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1665
1666 // Use depth first search to check if any non-Phi input is unknown.
1667 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1668 phi_placeholders_to_materialize->ClearAllBits();
1669 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
1670 work_queue.push_back(phi_placeholder);
1671 while (!work_queue.empty()) {
1672 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1673 work_queue.pop_back();
1674 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
1675 // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
1676 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
1677 Value::Default()));
1678 continue;
1679 }
1680 HBasicBlock* current_block = blocks[current_phi_placeholder->GetBlockId()];
1681 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1682 size_t idx = current_phi_placeholder->GetHeapLocation();
1683 if (current_block->IsLoopHeader()) {
1684 // If the index is defined inside the loop, it may reference different elements of the
1685 // array on each iteration. Since we do not track if all elements of an array are set
1686 // to the same value explicitly, the only known value in pre-header can be the default
1687 // value from NewArray or a Phi placeholder depending on a default value from some outer
1688 // loop pre-header. This Phi placeholder can be replaced only by the default value.
1689 HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
1690 if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
1691 if (can_use_default_or_phi &&
1692 TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
1693 type,
1694 phi_placeholders_to_materialize)) {
1695 continue;
1696 } else {
1697 return current_phi_placeholder; // Report the loop Phi placeholder.
1698 }
1699 }
1700 // A similar situation arises with the index defined outside the loop if we cannot use
1701 // default values or Phis, i.e. for vector loads, as we can only replace the Phi
1702 // placeholder with a single instruction defined before the loop.
1703 if (!can_use_default_or_phi) {
1704 if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
1705 phi_placeholders_to_materialize)) {
1706 continue;
1707 } else {
1708 return current_phi_placeholder; // Report the loop Phi placeholder.
1709 }
1710 }
1711 }
1712 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1713 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1714 if (value.IsUnknown()) {
1715 // We cannot create a Phi for this loop Phi placeholder.
1716 return current_phi_placeholder; // Report the loop Phi placeholder.
1717 }
1718 if (value.NeedsLoopPhi()) {
1719 // Visit the predecessor Phi placeholder if it's not visited yet.
1720 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
1721 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
1722 work_queue.push_back(value.GetPhiPlaceholder());
1723 }
1724 }
1725 }
1726 }
1727
1728 // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
1729 return nullptr;
1730}
1731
1732bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
1733 DataType::Type type,
1734 Phase phase) {
1735 // Materialize all predecessors that do not need a loop Phi and determine if all inputs
1736 // other than loop Phis are the same.
1737 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1738 Value other_value = Value::Invalid();
1739 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1740 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1741 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1742 DCHECK_GE(block->GetPredecessors().size(), 2u);
1743 size_t idx = phi_placeholder->GetHeapLocation();
1744 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1745 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1746 if (value.NeedsNonLoopPhi()) {
1747 DCHECK(phase == Phase::kLoadElimination);
1748 MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
1749 value = Replacement(value);
1750 }
1751 if (!value.NeedsLoopPhi()) {
1752 if (other_value.IsInvalid()) {
1753 // The first other value we found.
1754 other_value = value;
1755 } else if (!other_value.IsUnknown()) {
1756 // Check if the current `value` differs from the previous `other_value`.
1757 if (!value.Equals(other_value)) {
1758 other_value = Value::Unknown();
1759 }
1760 }
1761 }
1762 }
1763 }
1764
1765 DCHECK(other_value.IsValid());
1766 if (!other_value.IsUnknown()) {
1767 HInstruction* replacement =
1768 (other_value.IsDefault()) ? GetDefaultValue(type) : other_value.GetInstruction();
1769 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1770 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1771 }
1772 return true;
1773 }
1774
1775 // If we're materializing only a single Phi, try to match it with an existing Phi.
1776 // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
1777 // This also covers the case when after replacing a previous set of Phi placeholders,
1778 // we continue with a Phi placeholder that does not really need a loop Phi anymore.
1779 if (phi_placeholder_indexes.size() == 1u) {
1780 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_indexes[0]];
1781 size_t idx = phi_placeholder->GetHeapLocation();
1782 HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder->GetBlockId()];
1783 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1784 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1785 HInstruction* phi = phi_it.Current();
1786 DCHECK_EQ(phi->InputCount(), predecessors.size());
1787 ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
1788 auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
1789 Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
1790 if (value.NeedsPhi()) {
1791 DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
1792 return lhs.GetInstruction() == phi;
1793 } else {
1794 DCHECK(value.IsDefault() || value.IsInstruction());
1795 return value.Equals(lhs.GetInstruction());
1796 }
1797 };
1798 if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
1799 phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
1800 return true;
1801 }
1802 }
1803 }
1804
1805 if (phase == Phase::kStoreElimination) {
Vladimir Markoed29dce2020-08-21 17:25:16 +01001806 // We're not creating Phis during the final store elimination phase.
Vladimir Marko3224f382020-06-23 14:19:53 +01001807 return false;
1808 }
1809
1810 // There are different inputs to the Phi chain. Create the Phis.
1811 ArenaAllocator* allocator = GetGraph()->GetAllocator();
1812 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1813 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1814 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1815 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
1816 new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
1817 }
1818 // Fill the Phi inputs.
1819 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1820 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1821 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1822 size_t idx = phi_placeholder->GetHeapLocation();
1823 HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
1824 for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
1825 HBasicBlock* predecessor = block->GetPredecessors()[i];
1826 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1827 HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
1828 DCHECK_NE(input->GetType(), DataType::Type::kVoid);
1829 phi->SetRawInputAt(i, input);
1830 }
1831 }
1832 // Add the Phis to their blocks.
1833 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1834 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1835 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1836 block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
1837 }
1838 if (type == DataType::Type::kReference) {
1839 ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
1840 ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
1841 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1842 phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
1843 }
1844 // Update reference type information. Pass invalid handles, these are not used for Phis.
1845 ReferenceTypePropagation rtp_fixup(GetGraph(),
1846 Handle<mirror::ClassLoader>(),
1847 Handle<mirror::DexCache>(),
1848 /* is_first_run= */ false);
1849 rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
1850 }
1851
1852 return true;
1853}
1854
1855bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
1856 DataType::Type type,
1857 Phase phase) {
1858 // Use local allocator to reduce peak memory usage.
1859 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1860
Vladimir Markoed29dce2020-08-21 17:25:16 +01001861 // We want to recognize when a subset of these loop Phis that do not need other
Vladimir Marko3224f382020-06-23 14:19:53 +01001862 // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
1863 // i.e. that instruction can be used instead of each Phi in the set. See for example
1864 // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
1865 // materialize these loop Phis from the smallest transitive closure.
1866
1867 // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
1868 // assign new indexes to the Phi placeholders, making the matrix dense.
1869 ScopedArenaVector<size_t> matrix_indexes(phi_placeholders_.size(),
1870 static_cast<size_t>(-1), // Invalid.
1871 allocator.Adapter(kArenaAllocLSE));
1872 ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
1873 size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
1874 phi_placeholder_indexes.reserve(num_phi_placeholders);
1875 for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
1876 matrix_indexes[marker_index] = phi_placeholder_indexes.size();
1877 phi_placeholder_indexes.push_back(marker_index);
1878 }
1879 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1880 ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
1881 dependencies.reserve(num_phi_placeholders);
1882 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1883 static constexpr bool kExpandable = false;
1884 dependencies.push_back(
1885 ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
1886 ArenaBitVector* current_dependencies = dependencies.back();
1887 current_dependencies->ClearAllBits();
1888 current_dependencies->SetBit(matrix_index); // Count the Phi placeholder as its own dependency.
1889 const PhiPlaceholder* current_phi_placeholder =
1890 &phi_placeholders_[phi_placeholder_indexes[matrix_index]];
1891 HBasicBlock* current_block = blocks[current_phi_placeholder->GetBlockId()];
1892 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1893 size_t idx = current_phi_placeholder->GetHeapLocation();
1894 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1895 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1896 if (pred_value.NeedsLoopPhi()) {
1897 size_t pred_value_index = PhiPlaceholderIndex(pred_value);
1898 DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
1899 DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
1900 current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
1901 }
1902 }
1903 }
1904
1905 // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
1906 for (size_t k = 0; k != num_phi_placeholders; ++k) {
1907 for (size_t i = 0; i != num_phi_placeholders; ++i) {
1908 for (size_t j = 0; j != num_phi_placeholders; ++j) {
1909 if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
1910 dependencies[i]->SetBit(j);
1911 }
1912 }
1913 }
1914 }
1915
1916 // Count the number of transitive dependencies for each replaceable Phi placeholder.
1917 ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
1918 num_dependencies.reserve(num_phi_placeholders);
1919 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1920 num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
1921 }
1922
1923 // Pick a Phi placeholder with the smallest number of transitive dependencies and
1924 // materialize it and its dependencies. Repeat until we have materialized all.
1925 ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
1926 current_subset.reserve(num_phi_placeholders);
1927 size_t remaining_phi_placeholders = num_phi_placeholders;
1928 while (remaining_phi_placeholders != 0u) {
1929 auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
1930 DCHECK_LE(*it, remaining_phi_placeholders);
1931 size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
1932 ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
1933 size_t current_num_dependencies = num_dependencies[current_matrix_index];
1934 current_subset.clear();
1935 for (uint32_t matrix_index : current_dependencies->Indexes()) {
1936 current_subset.push_back(phi_placeholder_indexes[matrix_index]);
1937 }
1938 if (!MaterializeLoopPhis(current_subset, type, phase)) {
1939 DCHECK(phase == Phase::kStoreElimination);
1940 // This is the final store elimination phase and we shall not be able to eliminate any
1941 // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
1942 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1943 if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
1944 DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
1945 phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] = Value::Unknown();
1946 }
1947 }
1948 return false;
1949 }
1950 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1951 if (current_dependencies->IsBitSet(matrix_index)) {
1952 // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
1953 // so that they shall never be the minimum again.
1954 num_dependencies[matrix_index] = num_phi_placeholders;
1955 } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
1956 // Remove dependencies from other Phi placeholders.
1957 dependencies[matrix_index]->Subtract(current_dependencies);
1958 num_dependencies[matrix_index] -= current_num_dependencies;
1959 }
1960 }
1961 remaining_phi_placeholders -= current_num_dependencies;
1962 }
1963 return true;
1964}
1965
1966const LSEVisitor::PhiPlaceholder* LSEVisitor::TryToMaterializeLoopPhis(
1967 const PhiPlaceholder* phi_placeholder,
1968 HInstruction* load) {
1969 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1970
1971 // Use local allocator to reduce peak memory usage.
1972 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1973
1974 // Find Phi placeholders to materialize.
1975 ArenaBitVector phi_placeholders_to_materialize(
1976 &allocator, phi_placeholders_.size(), /*expandable=*/ false, kArenaAllocLSE);
1977 phi_placeholders_to_materialize.ClearAllBits();
1978 DataType::Type type = load->GetType();
1979 bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
1980 const PhiPlaceholder* loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
1981 phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
1982 if (loop_phi_with_unknown_input != nullptr) {
1983 return loop_phi_with_unknown_input; // Return failure.
1984 }
1985
1986 bool success =
1987 MaterializeLoopPhis(phi_placeholders_to_materialize, type, Phase::kLoadElimination);
1988 DCHECK(success);
1989
1990 // Report success.
1991 return nullptr;
1992}
1993
1994// Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
1995// find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
1996// propagate the load(s) as the new value(s) to successors; this may uncover new elimination
1997// opportunities. If we find no such load, we shall at least propagate an unknown value to some
1998// heap location that is needed by another loop Phi placeholder.
1999void LSEVisitor::ProcessLoopPhiWithUnknownInput(const PhiPlaceholder* loop_phi_with_unknown_input) {
2000 size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
2001 DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
Alex Light86fe9b82020-11-16 16:54:01 +00002002 phi_placeholder_replacements_[loop_phi_with_unknown_input_index] =
2003 Value::MergedUnknown(loop_phi_with_unknown_input);
Vladimir Marko3224f382020-06-23 14:19:53 +01002004
2005 uint32_t block_id = loop_phi_with_unknown_input->GetBlockId();
2006 const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
2007 size_t reverse_post_order_index = 0;
2008 size_t reverse_post_order_size = reverse_post_order.size();
2009 size_t loads_and_stores_index = 0u;
2010 size_t loads_and_stores_size = loads_and_stores_.size();
2011
2012 // Skip blocks and instructions before the block containing the loop phi with unknown input.
2013 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2014 while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
2015 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2016 while (loads_and_stores_index != loads_and_stores_size &&
2017 loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
2018 ++loads_and_stores_index;
2019 }
2020 ++reverse_post_order_index;
2021 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
2022 }
2023
2024 // Use local allocator to reduce peak memory usage.
Vladimir Marko9e3fe992020-08-25 16:17:51 +01002025 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
Vladimir Marko3224f382020-06-23 14:19:53 +01002026 // Reuse one temporary vector for all remaining blocks.
2027 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
Vladimir Marko9e3fe992020-08-25 16:17:51 +01002028 ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
Vladimir Marko3224f382020-06-23 14:19:53 +01002029
2030 auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
2031 Value value;
2032 if (block->IsLoopHeader()) {
2033 if (block->GetLoopInformation()->IsIrreducible()) {
Alex Light86fe9b82020-11-16 16:54:01 +00002034 const PhiPlaceholder* placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
2035 value = Value::MergedUnknown(placeholder);
Vladimir Marko3224f382020-06-23 14:19:53 +01002036 } else {
2037 value = PrepareLoopValue(block, idx);
2038 }
2039 } else {
2040 value = MergePredecessorValues(block, idx);
2041 }
2042 DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
2043 return value;
2044 };
2045
2046 // Process remaining blocks and instructions.
2047 bool found_unreplaceable_load = false;
2048 bool replaced_heap_value_with_unknown = false;
2049 for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
2050 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
2051 if (block->IsExitBlock()) {
2052 continue;
2053 }
2054
2055 // We shall reconstruct only the heap values that we need for processing loads and stores.
2056 local_heap_values.clear();
2057 local_heap_values.resize(num_heap_locations, Value::Invalid());
2058
2059 for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
2060 HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
2061 size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
2062 if (load_or_store->GetBlock() != block) {
2063 break; // End of instructions from the current block.
2064 }
2065 bool is_store = load_or_store->GetSideEffects().DoesAnyWrite();
2066 DCHECK_EQ(is_store, IsStore(load_or_store));
2067 HInstruction* stored_value = nullptr;
2068 if (is_store) {
2069 auto it = store_records_.find(load_or_store);
2070 DCHECK(it != store_records_.end());
2071 stored_value = it->second.stored_value;
2072 }
2073 auto it = loads_requiring_loop_phi_.find(
2074 stored_value != nullptr ? stored_value : load_or_store);
2075 if (it == loads_requiring_loop_phi_.end()) {
2076 continue; // This load or store never needed a loop Phi.
2077 }
2078 ValueRecord& record = it->second;
Vladimir Marko9e3fe992020-08-25 16:17:51 +01002079 if (is_store) {
2080 // Process the store by updating `local_heap_values[idx]`. The last update shall
2081 // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
2082 // at the end of the block.
2083 Value replacement = ReplacementOrValue(record.value);
2084 if (replacement.NeedsLoopPhi()) {
2085 // No replacement yet, use the Phi placeholder from the load.
2086 DCHECK(record.value.NeedsLoopPhi());
2087 local_heap_values[idx] = record.value;
2088 } else {
2089 // If the load fetched a known value, use it, otherwise use the load.
2090 local_heap_values[idx] = Value::ForInstruction(
2091 replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
2092 }
2093 } else {
Vladimir Marko3224f382020-06-23 14:19:53 +01002094 // Process the load unless it has previously been marked unreplaceable.
2095 if (record.value.NeedsLoopPhi()) {
2096 if (local_heap_values[idx].IsInvalid()) {
2097 local_heap_values[idx] = get_initial_value(block, idx);
2098 }
2099 if (local_heap_values[idx].IsUnknown()) {
2100 // This load cannot be replaced. Keep stores that feed the Phi placeholder
2101 // (no aliasing since then, otherwise the Phi placeholder would not have been
2102 // propagated as a value to this load) and store the load as the new heap value.
2103 found_unreplaceable_load = true;
2104 KeepStores(record.value);
2105 record.value = Value::Unknown();
2106 local_heap_values[idx] = Value::ForInstruction(load_or_store);
2107 } else if (local_heap_values[idx].NeedsLoopPhi()) {
2108 // The load may still be replaced with a Phi later.
2109 DCHECK(local_heap_values[idx].Equals(record.value));
2110 } else {
2111 // This load can be eliminated but we may need to construct non-loop Phis.
2112 if (local_heap_values[idx].NeedsNonLoopPhi()) {
2113 MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
2114 load_or_store->GetType());
2115 local_heap_values[idx] = Replacement(local_heap_values[idx]);
2116 }
2117 record.value = local_heap_values[idx];
2118 HInstruction* heap_value = local_heap_values[idx].GetInstruction();
2119 AddRemovedLoad(load_or_store, heap_value);
2120 TryRemovingNullCheck(load_or_store);
2121 }
2122 }
Vladimir Marko3224f382020-06-23 14:19:53 +01002123 }
2124 }
2125
2126 // All heap values that previously needed a loop Phi at the end of the block
2127 // need to be updated for processing successors.
2128 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
2129 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
2130 if (heap_values[idx].value.NeedsLoopPhi()) {
2131 if (local_heap_values[idx].IsValid()) {
2132 heap_values[idx].value = local_heap_values[idx];
2133 } else {
2134 heap_values[idx].value = get_initial_value(block, idx);
2135 }
2136 if (heap_values[idx].value.IsUnknown()) {
2137 replaced_heap_value_with_unknown = true;
2138 }
2139 }
2140 }
2141 }
2142 DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2143}
2144
2145void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2146 // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2147 // make the result of the processing depend on the order in which we process these loads.
2148 // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2149 // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2150 for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2151 auto it = loads_requiring_loop_phi_.find(load_store_record.load_or_store);
2152 if (it == loads_requiring_loop_phi_.end()) {
2153 continue;
2154 }
2155 HInstruction* load = it->first;
2156 ValueRecord& record = it->second;
2157 while (record.value.NeedsLoopPhi() &&
2158 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid()) {
2159 const PhiPlaceholder* loop_phi_with_unknown_input =
2160 TryToMaterializeLoopPhis(record.value.GetPhiPlaceholder(), load);
2161 DCHECK_EQ(loop_phi_with_unknown_input != nullptr,
2162 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid());
2163 if (loop_phi_with_unknown_input != nullptr) {
2164 ProcessLoopPhiWithUnknownInput(loop_phi_with_unknown_input);
2165 }
2166 }
2167 // The load could have been marked as unreplaceable (and stores marked for keeping)
2168 // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2169 DCHECK(record.value.IsUnknown() || record.value.IsInstruction() || record.value.NeedsLoopPhi());
2170 if (record.value.NeedsLoopPhi()) {
2171 record.value = Replacement(record.value);
2172 HInstruction* heap_value = record.value.GetInstruction();
2173 AddRemovedLoad(load, heap_value);
2174 TryRemovingNullCheck(load);
2175 }
2176 }
2177}
2178
2179void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2180 ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2181 size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2182 work_queue.reserve(((start_size * 3u) + 1u) / 2u); // Reserve 1.5x start size, rounded up.
2183 for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2184 work_queue.push_back(index);
2185 }
2186 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
Alex Light86fe9b82020-11-16 16:54:01 +00002187 std::optional<ArenaBitVector> not_kept_stores;
2188 if (stats_) {
2189 not_kept_stores.emplace(GetGraph()->GetAllocator(),
2190 kept_stores_.GetBitSizeOf(),
2191 false,
2192 ArenaAllocKind::kArenaAllocLSE);
2193 }
Vladimir Marko3224f382020-06-23 14:19:53 +01002194 while (!work_queue.empty()) {
Alex Light86fe9b82020-11-16 16:54:01 +00002195 uint32_t cur_phi_idx = work_queue.back();
2196 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[cur_phi_idx];
2197 // Only writes to partial-escapes need to be specifically kept.
2198 bool is_partial_kept_merged_unknown =
2199 kept_merged_unknowns_.IsBitSet(cur_phi_idx) &&
2200 heap_location_collector_.GetHeapLocation(phi_placeholder->GetHeapLocation())
2201 ->GetReferenceInfo()
2202 ->IsPartialSingleton();
Vladimir Marko3224f382020-06-23 14:19:53 +01002203 work_queue.pop_back();
2204 size_t idx = phi_placeholder->GetHeapLocation();
2205 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
2206 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2207 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
Vladimir Marko0571d472020-09-22 10:14:39 +01002208 // For loop back-edges we must also preserve all stores to locations that
2209 // may alias with the location `idx`.
Vladimir Marko3224f382020-06-23 14:19:53 +01002210 // TODO: Review whether we need to keep stores to aliased locations from pre-header.
Vladimir Marko3224f382020-06-23 14:19:53 +01002211 // TODO: Add tests cases around this.
2212 bool is_back_edge =
2213 block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2214 size_t start = is_back_edge ? 0u : idx;
2215 size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2216 for (size_t i = start; i != end; ++i) {
2217 Value stored_by = heap_values[i].stored_by;
Vladimir Marko0571d472020-09-22 10:14:39 +01002218 auto may_alias = [this, block, idx](size_t i) {
2219 DCHECK_NE(i, idx);
2220 DCHECK(block->IsLoopHeader());
2221 if (heap_location_collector_.MayAlias(i, idx)) {
2222 return true;
2223 }
2224 // For array locations with index defined inside the loop, include
2225 // all other locations in the array, even those that LSA declares
2226 // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
2227 // refer to the same locations for different iterations. (LSA's
2228 // `ComputeMayAlias()` does not consider different loop iterations.)
2229 HeapLocation* heap_loc = heap_location_collector_.GetHeapLocation(idx);
2230 HeapLocation* other_loc = heap_location_collector_.GetHeapLocation(i);
2231 if (heap_loc->IsArray() &&
2232 other_loc->IsArray() &&
2233 heap_loc->GetReferenceInfo() == other_loc->GetReferenceInfo() &&
2234 block->GetLoopInformation()->Contains(*heap_loc->GetIndex()->GetBlock())) {
2235 // If one location has index defined inside and the other index defined outside
2236 // of the loop, LSA considers them aliasing and we take an early return above.
2237 DCHECK(block->GetLoopInformation()->Contains(*other_loc->GetIndex()->GetBlock()));
2238 return true;
2239 }
2240 return false;
2241 };
2242 if (!stored_by.IsUnknown() && (i == idx || may_alias(i))) {
Vladimir Marko3224f382020-06-23 14:19:53 +01002243 if (stored_by.NeedsPhi()) {
2244 size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
Alex Light86fe9b82020-11-16 16:54:01 +00002245 if (is_partial_kept_merged_unknown) {
2246 // Propagate merged-unknown keep since otherwise this might look
2247 // like a partial escape we can remove.
2248 kept_merged_unknowns_.SetBit(phi_placeholder_index);
2249 }
Vladimir Marko3224f382020-06-23 14:19:53 +01002250 if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2251 phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2252 work_queue.push_back(phi_placeholder_index);
2253 }
2254 } else {
2255 DCHECK(IsStore(stored_by.GetInstruction()));
Alex Light86fe9b82020-11-16 16:54:01 +00002256 ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
2257 DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
2258 << " id: " << stored_by.GetInstruction()->GetId() << " block: "
2259 << stored_by.GetInstruction()->GetBlock()->GetBlockId();
2260 if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) {
2261 if (not_kept_stores) {
2262 not_kept_stores->SetBit(stored_by.GetInstruction()->GetId());
2263 }
2264 } else {
2265 kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2266 }
Vladimir Marko3224f382020-06-23 14:19:53 +01002267 }
2268 }
2269 }
2270 }
2271 }
Alex Light86fe9b82020-11-16 16:54:01 +00002272 if (not_kept_stores) {
2273 // a - b := (a & ~b)
2274 not_kept_stores->Subtract(&kept_stores_);
2275 auto num_removed = not_kept_stores->NumSetBits();
2276 MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed);
2277 }
Vladimir Marko3224f382020-06-23 14:19:53 +01002278}
2279
2280void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2281 while (value_record->stored_by.IsInstruction() &&
2282 !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2283 auto it = store_records_.find(value_record->stored_by.GetInstruction());
2284 DCHECK(it != store_records_.end());
2285 *value_record = it->second.old_value_record;
2286 }
2287 if (value_record->stored_by.NeedsPhi() &&
2288 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2289 PhiPlaceholderIndex(value_record->stored_by))) {
2290 // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2291 // Phi placeholder to recalculate the actual value.
2292 value_record->value = value_record->stored_by;
2293 }
2294 value_record->value = ReplacementOrValue(value_record->value);
2295 if (value_record->value.NeedsNonLoopPhi()) {
2296 // Treat all Phi placeholders as requiring loop Phis at this point.
2297 // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2298 value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2299 }
2300}
2301
2302void LSEVisitor::FindOldValueForPhiPlaceholder(const PhiPlaceholder* phi_placeholder,
2303 DataType::Type type) {
2304 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2305
2306 // Use local allocator to reduce peak memory usage.
2307 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2308 ArenaBitVector visited(&allocator,
2309 /*start_bits=*/ phi_placeholders_.size(),
2310 /*expandable=*/ false,
2311 kArenaAllocLSE);
2312 visited.ClearAllBits();
2313
2314 // Find Phi placeholders to try and match against existing Phis or other replacement values.
2315 ArenaBitVector phi_placeholders_to_materialize(
2316 &allocator, phi_placeholders_.size(), /*expandable=*/ false, kArenaAllocLSE);
2317 phi_placeholders_to_materialize.ClearAllBits();
2318 const PhiPlaceholder* loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2319 phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/ true);
2320 if (loop_phi_with_unknown_input != nullptr) {
2321 // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2322 phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::Unknown();
2323 phi_placeholder_replacements_[PhiPlaceholderIndex(loop_phi_with_unknown_input)] =
2324 Value::Unknown();
2325 return;
2326 }
2327
2328 bool success =
2329 MaterializeLoopPhis(phi_placeholders_to_materialize, type, Phase::kStoreElimination);
2330 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2331 DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2332 !success);
2333}
2334
2335void LSEVisitor::FindStoresWritingOldValues() {
2336 // The Phi placeholder replacements have so far been used for eliminating loads,
2337 // tracking values that would be stored if all stores were kept. As we want to
2338 // compare actual old values after removing unmarked stores, prune the Phi
2339 // placeholder replacements that can be fed by values we may not actually store.
2340 // Replacements marked as unknown can be kept as they are fed by some unknown
2341 // value and would end up as unknown again if we recalculated them.
2342 for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2343 if (!phi_placeholder_replacements_[i].IsUnknown() &&
2344 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2345 phi_placeholder_replacements_[i] = Value::Invalid();
2346 }
2347 }
2348
2349 // Update heap values at end of blocks.
2350 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2351 for (ValueRecord& value_record : heap_values_for_[block->GetBlockId()]) {
2352 UpdateValueRecordForStoreElimination(&value_record);
2353 }
2354 }
2355
2356 // Use local allocator to reduce peak memory usage.
2357 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2358 // Mark the stores we want to eliminate in a separate bit vector.
2359 ArenaBitVector eliminated_stores(&allocator,
2360 /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2361 /*expandable=*/ false,
2362 kArenaAllocLSE);
2363 eliminated_stores.ClearAllBits();
2364
2365 for (auto& entry : store_records_) {
2366 HInstruction* store = entry.first;
2367 StoreRecord& store_record = entry.second;
2368 if (!kept_stores_.IsBitSet(store->GetId())) {
2369 continue; // Ignore stores that are not kept.
2370 }
2371 UpdateValueRecordForStoreElimination(&store_record.old_value_record);
2372 if (store_record.old_value_record.value.NeedsPhi()) {
2373 DataType::Type type = store_record.stored_value->GetType();
2374 FindOldValueForPhiPlaceholder(store_record.old_value_record.value.GetPhiPlaceholder(), type);
2375 store_record.old_value_record.value = ReplacementOrValue(store_record.old_value_record.value);
2376 }
2377 DCHECK(!store_record.old_value_record.value.NeedsPhi());
2378 HInstruction* stored_value = FindSubstitute(store_record.stored_value);
2379 if (store_record.old_value_record.value.Equals(stored_value)) {
2380 eliminated_stores.SetBit(store->GetId());
2381 }
2382 }
2383
2384 // Commit the stores to eliminate by removing them from `kept_stores_`.
2385 kept_stores_.Subtract(&eliminated_stores);
2386}
2387
2388void LSEVisitor::Run() {
2389 // 1. Process blocks and instructions in reverse post order.
2390 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2391 VisitBasicBlock(block);
2392 }
2393
2394 // 2. Process loads that require loop Phis, trying to find/create replacements.
2395 ProcessLoadsRequiringLoopPhis();
2396
2397 // 3. Determine which stores to keep and which to eliminate.
2398
2399 // Finish marking stores for keeping.
2400 SearchPhiPlaceholdersForKeptStores();
2401
2402 // Find stores that write the same value as is already present in the location.
2403 FindStoresWritingOldValues();
2404
2405 // 4. Replace loads and remove unnecessary stores and singleton allocations.
2406
2407 // Remove recorded load instructions that should be eliminated.
Vladimir Marko9e3fe992020-08-25 16:17:51 +01002408 for (const LoadStoreRecord& record : loads_and_stores_) {
2409 size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2410 HInstruction* substitute = substitute_instructions_for_loads_[id];
2411 if (substitute == nullptr) {
2412 continue;
2413 }
2414 HInstruction* load = record.load_or_store;
Vladimir Marko3224f382020-06-23 14:19:53 +01002415 DCHECK(load != nullptr);
2416 DCHECK(IsLoad(load));
2417 DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
Vladimir Marko3224f382020-06-23 14:19:53 +01002418 // We proactively retrieve the substitute for a removed load, so
2419 // a load that has a substitute should not be observed as a heap
2420 // location value.
2421 DCHECK_EQ(FindSubstitute(substitute), substitute);
2422
2423 load->ReplaceWith(substitute);
2424 load->GetBlock()->RemoveInstruction(load);
2425 }
2426
2427 // Remove all the stores we can.
2428 for (const LoadStoreRecord& record : loads_and_stores_) {
2429 bool is_store = record.load_or_store->GetSideEffects().DoesAnyWrite();
2430 DCHECK_EQ(is_store, IsStore(record.load_or_store));
2431 if (is_store && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2432 record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2433 }
2434 }
2435
2436 // Eliminate singleton-classified instructions:
2437 // * - Constructor fences (they never escape this thread).
2438 // * - Allocations (if they are unused).
2439 for (HInstruction* new_instance : singleton_new_instances_) {
2440 size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2441 MaybeRecordStat(stats_,
2442 MethodCompilationStat::kConstructorFenceRemovedLSE,
2443 removed);
2444
2445 if (!new_instance->HasNonEnvironmentUses()) {
2446 new_instance->RemoveEnvironmentUsers();
2447 new_instance->GetBlock()->RemoveInstruction(new_instance);
Alex Light86fe9b82020-11-16 16:54:01 +00002448 MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
Vladimir Marko3224f382020-06-23 14:19:53 +01002449 }
2450 }
2451}
2452
Aart Bik24773202018-04-26 10:28:51 -07002453bool LoadStoreElimination::Run() {
David Brazdil8993caf2015-12-07 10:04:40 +00002454 if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
Mingyao Yang8df69d42015-10-22 15:40:58 -07002455 // Debugger may set heap values or trigger deoptimization of callers.
David Brazdil8993caf2015-12-07 10:04:40 +00002456 // Try/catch support not implemented yet.
Mingyao Yang8df69d42015-10-22 15:40:58 -07002457 // Skip this optimization.
Aart Bik24773202018-04-26 10:28:51 -07002458 return false;
Mingyao Yang8df69d42015-10-22 15:40:58 -07002459 }
Alex Light86fe9b82020-11-16 16:54:01 +00002460 // We need to be able to determine reachability. Clear it just to be safe but
2461 // this should initially be empty.
2462 graph_->ClearReachabilityInformation();
2463 // This is O(blocks^3) time complexity. It means we can query reachability in
2464 // O(1) though.
2465 graph_->ComputeReachabilityInformation();
Vladimir Markoef898422020-06-08 10:26:06 +01002466 ScopedArenaAllocator allocator(graph_->GetArenaStack());
Alex Light86fe9b82020-11-16 16:54:01 +00002467 LoadStoreAnalysis lsa(graph_, stats_, &allocator, /*for_elimination=*/true);
Vladimir Markoef898422020-06-08 10:26:06 +01002468 lsa.Run();
2469 const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
xueliang.zhongc239a2b2017-04-27 15:31:37 +01002470 if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2471 // No HeapLocation information from LSA, skip this optimization.
Aart Bik24773202018-04-26 10:28:51 -07002472 return false;
Mingyao Yang8df69d42015-10-22 15:40:58 -07002473 }
xueliang.zhongc239a2b2017-04-27 15:31:37 +01002474
Vladimir Marko3224f382020-06-23 14:19:53 +01002475 LSEVisitor lse_visitor(graph_, heap_location_collector, stats_);
2476 lse_visitor.Run();
Aart Bik24773202018-04-26 10:28:51 -07002477 return true;
Mingyao Yang8df69d42015-10-22 15:40:58 -07002478}
2479
2480} // namespace art