blob: 5f1582275dff8e58cd1326dc885981ba529d3e3f [file] [log] [blame]
Mingyao Yang8df69d42015-10-22 15:40:58 -07001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "load_store_elimination.h"
Aart Bik96fd51d2016-11-28 11:22:35 -080018
Vladimir Marko3224f382020-06-23 14:19:53 +010019#include "base/arena_bit_vector.h"
Vladimir Marko009d1662017-10-10 13:21:15 +010020#include "base/array_ref.h"
Vladimir Marko3224f382020-06-23 14:19:53 +010021#include "base/bit_vector-inl.h"
Vladimir Marko009d1662017-10-10 13:21:15 +010022#include "base/scoped_arena_allocator.h"
23#include "base/scoped_arena_containers.h"
Aart Bik96fd51d2016-11-28 11:22:35 -080024#include "escape.h"
Andreas Gampe8cf9cb32017-07-19 09:28:38 -070025#include "load_store_analysis.h"
Alex Light9dec90a2020-09-14 17:58:28 -070026#include "optimizing/optimizing_compiler_stats.h"
Vladimir Marko3224f382020-06-23 14:19:53 +010027#include "reference_type_propagation.h"
Mingyao Yang8df69d42015-10-22 15:40:58 -070028
Mingyao Yanga3540532018-01-25 12:17:28 -080029/**
30 * The general algorithm of load-store elimination (LSE).
Vladimir Marko3224f382020-06-23 14:19:53 +010031 *
32 * We use load-store analysis to collect a list of heap locations and perform
33 * alias analysis of those heap locations. LSE then keeps track of a list of
34 * heap values corresponding to the heap locations and stores that put those
Vladimir Marko9e3fe992020-08-25 16:17:51 +010035 * values in these locations.
36 * - In phase 1, we visit basic blocks in reverse post order and for each basic
37 * block, visit instructions sequentially, recording heap values and looking
38 * for loads and stores to eliminate without relying on loop Phis.
39 * - In phase 2, we look for loads that can be replaced by creating loop Phis
40 * or using a loop-invariant value.
41 * - In phase 3, we determine which stores are dead and can be eliminated and
42 * based on that information we re-evaluate whether some kept stores are
43 * storing the same value as the value in the heap location; such stores are
44 * also marked for elimination.
45 * - In phase 4, we commit the changes, replacing loads marked for elimination
46 * in previous processing and removing stores not marked for keeping. We also
47 * remove allocations that are no longer needed.
Vladimir Marko3224f382020-06-23 14:19:53 +010048 *
49 * 1. Walk over blocks and their instructions.
50 *
51 * The initial set of heap values for a basic block is
52 * - For a loop header of an irreducible loop, all heap values are unknown.
53 * - For a loop header of a normal loop, all values unknown at the end of the
54 * preheader are initialized to unknown, other heap values are set to Phi
55 * placeholders as we cannot determine yet whether these values are known on
56 * all back-edges. We use Phi placeholders also for array heap locations with
57 * index defined inside the loop but this helps only when the value remains
58 * zero from the array allocation throughout the loop.
59 * - For other basic blocks, we merge incoming values from the end of all
60 * predecessors. If any incoming value is unknown, the start value for this
61 * block is also unknown. Otherwise, if all the incoming values are the same
62 * (including the case of a single predecessor), the incoming value is used.
63 * Otherwise, we use a Phi placeholder to indicate different incoming values.
64 * We record whether such Phi placeholder depends on a loop Phi placeholder.
65 *
66 * For each instruction in the block
67 * - If the instruction is a load from a heap location with a known value not
68 * dependent on a loop Phi placeholder, the load can be eliminated, either by
69 * using an existing instruction or by creating new Phi(s) instead. In order
70 * to maintain the validity of all heap locations during the optimization
71 * phase, we only record substitutes at this phase and the real elimination
72 * is delayed till the end of LSE. Loads that require a loop Phi placeholder
73 * replacement are recorded for processing later.
74 * - If the instruction is a store, it updates the heap value for the heap
75 * location with the stored value and records the store itself so that we can
76 * mark it for keeping if the value becomes observable. Heap values are
77 * invalidated for heap locations that may alias with the store instruction's
78 * heap location and their recorded stores are marked for keeping as they are
79 * now potentially observable. The store instruction can be eliminated unless
80 * the value stored is later needed e.g. by a load from the same/aliased heap
81 * location or the heap location persists at method return/deoptimization.
82 * - A store that stores the same value as the heap value is eliminated.
83 * - For newly instantiated instances, their heap values are initialized to
84 * language defined default values.
85 * - Finalizable objects are considered as persisting at method
86 * return/deoptimization.
87 * - Some instructions such as invokes are treated as loading and invalidating
88 * all the heap values, depending on the instruction's side effects.
89 * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
90 * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
91 * alias and no load/store is eliminated in such case.
92 * - Currently this LSE algorithm doesn't handle graph with try-catch, due to
93 * the special block merging structure.
94 *
95 * The time complexity of the initial phase has several components. The total
96 * time for the initialization of heap values for all blocks is
97 * O(heap_locations * edges)
98 * and the time complexity for simple instruction processing is
99 * O(instructions).
100 * See the description of phase 3 for additional complexity due to matching of
101 * existing Phis for replacing loads.
102 *
103 * 2. Process loads that depend on loop Phi placeholders.
104 *
105 * We go over these loads to determine whether they can be eliminated. We look
106 * for the set of all Phi placeholders that feed the load and depend on a loop
107 * Phi placeholder and, if we find no unknown value, we construct the necessary
108 * Phi(s) or, if all other inputs are identical, i.e. the location does not
109 * change in the loop, just use that input. If we do find an unknown input, this
110 * must be from a loop back-edge and we replace the loop Phi placeholder with
111 * unknown value and re-process loads and stores that previously depended on
112 * loop Phi placeholders. This shall find at least one load of an unknown value
113 * which is now known to be unreplaceable or a new unknown value on a back-edge
114 * and we repeat this process until each load is either marked for replacement
115 * or found to be unreplaceable. As we mark at least one additional loop Phi
116 * placeholder as unreplacable in each iteration, this process shall terminate.
117 *
118 * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize()
119 * is limited by the number of Phi placeholders and their dependencies we need
120 * to search with worst-case time complexity
121 * O(phi_placeholder_dependencies) .
122 * The dependencies are usually just the Phi placeholders' potential inputs,
123 * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
124 * replacement search, there are additional dependencies to consider, see below.
125 *
Vladimir Marko0571d472020-09-22 10:14:39 +0100126 * In the successful case (no unknown inputs found) we use the Floyd-Warshall
Vladimir Markoed29dce2020-08-21 17:25:16 +0100127 * algorithm to determine transitive closures for each found Phi placeholder,
Vladimir Marko3224f382020-06-23 14:19:53 +0100128 * and then match or materialize Phis from the smallest transitive closure,
129 * so that we can determine if such subset has a single other input. This has
130 * time complexity
131 * O(phi_placeholders_found^3) .
132 * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not
133 * contribute to this as such Phi placeholders are replaced immediately.
134 * The total time of all such successful cases has time complexity
135 * O(phi_placeholders^3)
136 * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar
137 * argument applies to the searches used to find all successful cases, so their
138 * total contribution is also just an insignificant
139 * O(phi_placeholder_dependencies) .
140 * The materialization of Phis has an insignificant total time complexity
141 * O(phi_placeholders * edges) .
142 *
143 * If we find an unknown input, we re-process heap values and loads with a time
144 * complexity that's the same as the phase 1 in the worst case. Adding this to
145 * the depth-first search time complexity yields
146 * O(phi_placeholder_dependencies + heap_locations * edges + instructions)
147 * for a single iteration. We can ignore the middle term as it's proprotional
148 * to the number of Phi placeholder inputs included in the first term. Using
149 * the upper limit of number of such iterations, the total time complexity is
150 * O((phi_placeholder_dependencies + instructions) * phi_placeholders) .
151 *
152 * The upper bound of Phi placeholder inputs is
153 * heap_locations * edges
154 * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies
155 * include other heap locations in predecessor blocks with the upper bound of
156 * heap_locations^2 * edges .
157 * Using the estimate
158 * edges <= blocks^2
159 * and
160 * phi_placeholders <= heap_locations * blocks ,
161 * the worst-case time complexity of the
162 * O(phi_placeholder_dependencies * phi_placeholders)
163 * term from unknown input cases is actually
164 * O(heap_locations^3 * blocks^3) ,
Vladimir Marko0571d472020-09-22 10:14:39 +0100165 * exactly as the estimate for the Floyd-Warshall parts of successful cases.
Vladimir Marko3224f382020-06-23 14:19:53 +0100166 * Adding the other term from the unknown input cases (to account for the case
167 * with significantly more instructions than blocks and heap locations), the
168 * phase 2 time complexity is
169 * O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) .
170 *
171 * See the description of phase 3 for additional complexity due to matching of
172 * existing Phis for replacing loads.
173 *
174 * 3. Determine which stores to keep and which to eliminate.
175 *
Vladimir Markoed29dce2020-08-21 17:25:16 +0100176 * During instruction processing in phase 1 and re-processing in phase 2, we are
Vladimir Marko3224f382020-06-23 14:19:53 +0100177 * keeping a record of the stores and Phi placeholders that become observable
178 * and now propagate the observable Phi placeholders to all actual stores that
179 * feed them. Having determined observable stores, we look for stores that just
180 * overwrite the old value with the same. Since ignoring non-observable stores
181 * actually changes the old values in heap locations, we need to recalculate
182 * Phi placeholder replacements but we proceed similarly to the previous phase.
183 * We look for the set of all Phis that feed the old value replaced by the store
184 * (but ignoring whether they depend on a loop Phi) and, if we find no unknown
185 * value, we try to match existing Phis (we do not create new Phis anymore) or,
186 * if all other inputs are identical, i.e. the location does not change in the
187 * loop, just use that input. If this succeeds and the old value is identical to
188 * the value we're storing, such store shall be eliminated.
189 *
Vladimir Markoed29dce2020-08-21 17:25:16 +0100190 * The work is similar to the phase 2, except that we're not re-processing loads
Vladimir Marko3224f382020-06-23 14:19:53 +0100191 * and stores anymore, so the time complexity of phase 3 is
192 * O(heap_locations^3 * blocks^3) .
193 *
194 * There is additional complexity in matching existing Phis shared between the
195 * phases 1, 2 and 3. We are never trying to match two or more Phis at the same
196 * time (this could be difficult and slow), so each matching attempt is just
197 * looking at Phis in the block (both old Phis and newly created Phis) and their
198 * inputs. As we create at most `heap_locations` Phis in each block, the upper
199 * bound on the number of Phis we look at is
200 * heap_locations * (old_phis + heap_locations)
201 * and the worst-case time complexity is
202 * O(heap_locations^2 * edges + heap_locations * old_phis * edges) .
203 * The first term is lower than one term in phase 2, so the relevant part is
204 * O(heap_locations * old_phis * edges) .
205 *
206 * 4. Replace loads and remove unnecessary stores and singleton allocations.
207 *
208 * A special type of objects called singletons are instantiated in the method
209 * and have a single name, i.e. no aliases. Singletons have exclusive heap
210 * locations since they have no aliases. Singletons are helpful in narrowing
211 * down the life span of a heap location such that they do not always need to
212 * participate in merging heap values. Allocation of a singleton can be
213 * eliminated if that singleton is not used and does not persist at method
214 * return/deoptimization.
215 *
216 * The time complexity of this phase is
217 * O(instructions + instruction_uses) .
218 *
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100219 * FIXME: The time complexity described above assumes that the
220 * HeapLocationCollector finds a heap location for an instruction in O(1)
221 * time but it is currently O(heap_locations); this can be fixed by adding
222 * a hash map to the HeapLocationCollector.
Mingyao Yanga3540532018-01-25 12:17:28 -0800223 */
224
Vladimir Marko0a516052019-10-14 13:00:44 +0000225namespace art {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700226
Mingyao Yangc62b7ec2017-10-25 16:42:15 -0700227// Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
Vladimir Marko3224f382020-06-23 14:19:53 +0100228class LSEVisitor final : private HGraphDelegateVisitor {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700229 public:
230 LSEVisitor(HGraph* graph,
Vladimir Marko3224f382020-06-23 14:19:53 +0100231 const HeapLocationCollector& heap_location_collector,
232 OptimizingCompilerStats* stats);
233
234 void Run();
235
236 private:
237 class PhiPlaceholder {
238 public:
239 PhiPlaceholder(uint32_t block_id, uint32_t heap_location)
240 : block_id_(block_id),
241 heap_location_(dchecked_integral_cast<uint32_t>(heap_location)) {}
242
243 uint32_t GetBlockId() const {
244 return block_id_;
245 }
246
247 size_t GetHeapLocation() const {
248 return heap_location_;
249 }
250
251 private:
252 uint32_t block_id_;
253 uint32_t heap_location_;
254 };
255
256 class Value {
257 public:
258 enum class Type {
259 kInvalid,
260 kUnknown,
261 kDefault,
262 kInstruction,
263 kNeedsNonLoopPhi,
264 kNeedsLoopPhi,
265 };
266
267 static Value Invalid() {
268 Value value;
269 value.type_ = Type::kInvalid;
270 value.instruction_ = nullptr;
271 return value;
272 }
273
274 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
275 // A heap location can be set to an unknown heap value when:
276 // - it is coming from outside the method,
277 // - it is killed due to aliasing, or side effects, or merging with an unknown value.
278 static Value Unknown() {
279 Value value;
280 value.type_ = Type::kUnknown;
281 value.instruction_ = nullptr;
282 return value;
283 }
284
285 // Default heap value after an allocation.
286 // A heap location can be set to that value right after an allocation.
287 static Value Default() {
288 Value value;
289 value.type_ = Type::kDefault;
290 value.instruction_ = nullptr;
291 return value;
292 }
293
294 static Value ForInstruction(HInstruction* instruction) {
295 Value value;
296 value.type_ = Type::kInstruction;
297 value.instruction_ = instruction;
298 return value;
299 }
300
301 static Value ForNonLoopPhiPlaceholder(const PhiPlaceholder* phi_placeholder) {
302 Value value;
303 value.type_ = Type::kNeedsNonLoopPhi;
304 value.phi_placeholder_ = phi_placeholder;
305 return value;
306 }
307
308 static Value ForLoopPhiPlaceholder(const PhiPlaceholder* phi_placeholder) {
309 Value value;
310 value.type_ = Type::kNeedsLoopPhi;
311 value.phi_placeholder_ = phi_placeholder;
312 return value;
313 }
314
315 static Value ForPhiPlaceholder(const PhiPlaceholder* phi_placeholder, bool needs_loop_phi) {
316 return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder)
317 : ForNonLoopPhiPlaceholder(phi_placeholder);
318 }
319
320 bool IsValid() const {
321 return !IsInvalid();
322 }
323
324 bool IsInvalid() const {
325 return type_ == Type::kInvalid;
326 }
327
328 bool IsUnknown() const {
329 return type_ == Type::kUnknown;
330 }
331
332 bool IsDefault() const {
333 return type_ == Type::kDefault;
334 }
335
336 bool IsInstruction() const {
337 return type_ == Type::kInstruction;
338 }
339
340 bool NeedsNonLoopPhi() const {
341 return type_ == Type::kNeedsNonLoopPhi;
342 }
343
344 bool NeedsLoopPhi() const {
345 return type_ == Type::kNeedsLoopPhi;
346 }
347
348 bool NeedsPhi() const {
349 return NeedsNonLoopPhi() || NeedsLoopPhi();
350 }
351
352 HInstruction* GetInstruction() const {
353 DCHECK(IsInstruction());
354 return instruction_;
355 }
356
357 const PhiPlaceholder* GetPhiPlaceholder() const {
358 DCHECK(NeedsPhi());
359 return phi_placeholder_;
360 }
361
362 bool Equals(Value other) const {
363 // Only valid values can be compared.
364 DCHECK(IsValid());
365 DCHECK(other.IsValid());
366 if (type_ != other.type_) {
367 // Default values are equal to zero bit pattern instructions.
368 return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) ||
369 (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
370 } else {
371 // Note: Two unknown values are considered different.
372 return IsDefault() ||
373 (IsInstruction() && GetInstruction() == other.GetInstruction()) ||
374 (NeedsPhi() && GetPhiPlaceholder() == other.GetPhiPlaceholder());
375 }
376 }
377
378 bool Equals(HInstruction* instruction) const {
379 return Equals(ForInstruction(instruction));
380 }
381
382 private:
383 Type type_;
384 union {
385 HInstruction* instruction_;
386 const PhiPlaceholder* phi_placeholder_;
387 };
388 };
389
390 // Get Phi placeholder index for access to `phi_placeholder_replacements_`
391 // and "visited" bit vectors during depth-first searches.
392 size_t PhiPlaceholderIndex(const PhiPlaceholder* phi_placeholder) const {
393 return static_cast<size_t>(phi_placeholder - phi_placeholders_.data());
Mingyao Yang8df69d42015-10-22 15:40:58 -0700394 }
395
Vladimir Marko3224f382020-06-23 14:19:53 +0100396 size_t PhiPlaceholderIndex(Value phi_placeholder) const {
397 return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
Mingyao Yang8df69d42015-10-22 15:40:58 -0700398 }
399
Vladimir Marko3224f382020-06-23 14:19:53 +0100400 const PhiPlaceholder* GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
401 size_t phi_placeholders_begin = phi_placeholders_begin_for_block_[block_id];
402 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholders_begin + idx];
403 DCHECK_EQ(phi_placeholder->GetBlockId(), block_id);
404 DCHECK_EQ(phi_placeholder->GetHeapLocation(), idx);
405 return phi_placeholder;
406 }
407
408 Value Replacement(Value value) const {
409 DCHECK(value.NeedsPhi());
410 Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
411 DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
412 DCHECK(replacement.IsUnknown() ||
413 FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction());
414 return replacement;
415 }
416
417 Value ReplacementOrValue(Value value) const {
418 if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
419 return Replacement(value);
420 } else {
421 DCHECK(!value.IsInstruction() ||
422 FindSubstitute(value.GetInstruction()) == value.GetInstruction());
423 return value;
424 }
425 }
426
427 static ScopedArenaVector<PhiPlaceholder> CreatePhiPlaceholders(
428 HGraph* graph,
429 const HeapLocationCollector& heap_location_collector,
430 ScopedArenaAllocator* allocator);
431 static ScopedArenaVector<size_t> CreatePhiPlaceholdersBeginForBlock(
432 HGraph* graph,
433 const HeapLocationCollector& heap_location_collector,
434 ScopedArenaAllocator* allocator);
435
436 // The record of a heap value and instruction(s) that feed that value.
437 struct ValueRecord {
438 Value value;
439 Value stored_by;
440 };
441
Vladimir Marko4307cd72020-07-17 14:35:56 +0100442 HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
443 HInstruction* value,
444 DataType::Type expected_type) {
Vladimir Marko94539fd2017-11-15 17:52:46 +0000445 // Should never add type conversion into boolean value.
Vladimir Marko4307cd72020-07-17 14:35:56 +0100446 if (expected_type == DataType::Type::kBool ||
447 DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
448 // TODO: This prevents type conversion of default values but we can still insert
449 // type conversion of other constants and there is no constant folding pass after LSE.
450 IsZeroBitPattern(value)) {
451 return nullptr;
Vladimir Marko94539fd2017-11-15 17:52:46 +0000452 }
Vladimir Marko4307cd72020-07-17 14:35:56 +0100453
454 // Check if there is already a suitable TypeConversion we can reuse.
455 for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
456 if (use.GetUser()->IsTypeConversion() &&
457 use.GetUser()->GetType() == expected_type &&
458 // TODO: We could move the TypeConversion to a common dominator
459 // if it does not cross irreducible loop header.
460 use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
461 // Don't share across irreducible loop headers.
462 // TODO: can be more fine-grained than this by testing each dominator.
463 (use.GetUser()->GetBlock() == instruction->GetBlock() ||
464 !GetGraph()->HasIrreducibleLoops())) {
465 if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
466 use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
467 // Move the TypeConversion before the instruction.
468 use.GetUser()->MoveBefore(instruction);
469 }
470 DCHECK(use.GetUser()->StrictlyDominates(instruction));
471 return use.GetUser()->AsTypeConversion();
472 }
473 }
474
475 // We must create a new TypeConversion instruction.
476 HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
477 expected_type, value, instruction->GetDexPc());
478 instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
Vladimir Marko94539fd2017-11-15 17:52:46 +0000479 return type_conversion;
480 }
481
Mingyao Yanga3540532018-01-25 12:17:28 -0800482 // Find an instruction's substitute if it's a removed load.
Mingyao Yang206070c2017-11-29 23:01:58 -0800483 // Return the same instruction if it should not be removed.
Vladimir Marko3224f382020-06-23 14:19:53 +0100484 HInstruction* FindSubstitute(HInstruction* instruction) const {
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100485 size_t id = static_cast<size_t>(instruction->GetId());
486 if (id >= substitute_instructions_for_loads_.size()) {
487 DCHECK(!IsLoad(instruction)); // New Phi (may not be in the graph yet) or default value.
Mingyao Yanga3540532018-01-25 12:17:28 -0800488 return instruction;
489 }
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100490 HInstruction* substitute = substitute_instructions_for_loads_[id];
491 DCHECK(substitute == nullptr || IsLoad(instruction));
492 return (substitute != nullptr) ? substitute : instruction;
Mingyao Yang206070c2017-11-29 23:01:58 -0800493 }
494
Vladimir Marko94539fd2017-11-15 17:52:46 +0000495 void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800496 DCHECK(IsLoad(load));
Vladimir Marko3224f382020-06-23 14:19:53 +0100497 DCHECK_EQ(FindSubstitute(load), load);
Vladimir Marko94539fd2017-11-15 17:52:46 +0000498 DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
499 "Unexpected heap_value that has a substitute " << heap_value->DebugName();
Vladimir Marko94539fd2017-11-15 17:52:46 +0000500
Vladimir Marko4307cd72020-07-17 14:35:56 +0100501 // The load expects to load the heap value as type load->GetType().
502 // However the tracked heap value may not be of that type. An explicit
503 // type conversion may be needed.
504 // There are actually three types involved here:
505 // (1) tracked heap value's type (type A)
506 // (2) heap location (field or element)'s type (type B)
507 // (3) load's type (type C)
508 // We guarantee that type A stored as type B and then fetched out as
509 // type C is the same as casting from type A to type C directly, since
510 // type B and type C will have the same size which is guaranteed in
511 // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
512 // So we only need one type conversion from type A to type C.
513 HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
514 load, heap_value, load->GetType());
515
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100516 substitute_instructions_for_loads_[load->GetId()] =
517 type_conversion != nullptr ? type_conversion : heap_value;
Vladimir Marko94539fd2017-11-15 17:52:46 +0000518 }
519
Vladimir Marko3224f382020-06-23 14:19:53 +0100520 static bool IsLoad(HInstruction* instruction) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800521 // Unresolved load is not treated as a load.
522 return instruction->IsInstanceFieldGet() ||
Vladimir Marko3224f382020-06-23 14:19:53 +0100523 instruction->IsStaticFieldGet() ||
524 instruction->IsVecLoad() ||
525 instruction->IsArrayGet();
Mingyao Yanga3540532018-01-25 12:17:28 -0800526 }
527
Vladimir Marko3224f382020-06-23 14:19:53 +0100528 static bool IsStore(HInstruction* instruction) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800529 // Unresolved store is not treated as a store.
530 return instruction->IsInstanceFieldSet() ||
Vladimir Marko3224f382020-06-23 14:19:53 +0100531 instruction->IsArraySet() ||
532 instruction->IsVecStore() ||
533 instruction->IsStaticFieldSet();
Mingyao Yanga3540532018-01-25 12:17:28 -0800534 }
535
Vladimir Marko3224f382020-06-23 14:19:53 +0100536 // Check if it is allowed to use default values or Phis for the specified load.
537 static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) {
538 DCHECK(IsLoad(instruction));
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000539 // Using defaults for VecLoads requires to create additional vector operations.
540 // As there are some issues with scheduling vector operations it is better to avoid creating
541 // them.
Vladimir Marko3224f382020-06-23 14:19:53 +0100542 return !instruction->IsVecOperation();
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000543 }
544
Vladimir Marko3224f382020-06-23 14:19:53 +0100545 // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
546 // This is necessary if the stored heap value can be observed.
547 void KeepStores(Value value) {
548 if (value.IsUnknown()) {
Mingyao Yangfb8464a2015-11-02 10:56:59 -0800549 return;
550 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100551 if (value.NeedsPhi()) {
552 phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
553 } else {
554 HInstruction* instruction = value.GetInstruction();
555 DCHECK(IsStore(instruction));
556 kept_stores_.SetBit(instruction->GetId());
Mingyao Yangfb8464a2015-11-02 10:56:59 -0800557 }
558 }
559
Mingyao Yanga3540532018-01-25 12:17:28 -0800560 // If a heap location X may alias with heap location at `loc_index`
561 // and heap_values of that heap location X holds a store, keep that store.
562 // It's needed for a dependent load that's not eliminated since any store
563 // that may put value into the load's heap location needs to be kept.
Vladimir Marko3224f382020-06-23 14:19:53 +0100564 void KeepStoresIfAliasedToLocation(ScopedArenaVector<ValueRecord>& heap_values,
Mingyao Yanga3540532018-01-25 12:17:28 -0800565 size_t loc_index) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100566 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
567 if (i == loc_index) {
568 // We use this function when reading a location with unknown value and
569 // therefore we cannot know what exact store wrote that unknown value.
570 // But we can have a phi placeholder here marking multiple stores to keep.
571 DCHECK(!heap_values[i].stored_by.IsInstruction());
572 KeepStores(heap_values[i].stored_by);
573 heap_values[i].stored_by = Value::Unknown();
574 } else if (heap_location_collector_.MayAlias(i, loc_index)) {
575 KeepStores(heap_values[i].stored_by);
576 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang58d9bfc2016-11-01 13:31:58 -0700577 }
Mingyao Yang8df69d42015-10-22 15:40:58 -0700578 }
579 }
580
581 // `instruction` is being removed. Try to see if the null check on it
582 // can be removed. This can happen if the same value is set in two branches
583 // but not in dominators. Such as:
584 // int[] a = foo();
585 // if () {
586 // a[0] = 2;
587 // } else {
588 // a[0] = 2;
589 // }
590 // // a[0] can now be replaced with constant 2, and the null check on it can be removed.
591 void TryRemovingNullCheck(HInstruction* instruction) {
592 HInstruction* prev = instruction->GetPrevious();
593 if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
594 // Previous instruction is a null check for this instruction. Remove the null check.
595 prev->ReplaceWith(prev->InputAt(0));
596 prev->GetBlock()->RemoveInstruction(prev);
597 }
598 }
599
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100600 HInstruction* GetDefaultValue(DataType::Type type) {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700601 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100602 case DataType::Type::kReference:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700603 return GetGraph()->GetNullConstant();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100604 case DataType::Type::kBool:
Vladimir Markod5d2f2c2017-09-26 12:37:26 +0100605 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100606 case DataType::Type::kInt8:
607 case DataType::Type::kUint16:
608 case DataType::Type::kInt16:
609 case DataType::Type::kInt32:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700610 return GetGraph()->GetIntConstant(0);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100611 case DataType::Type::kInt64:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700612 return GetGraph()->GetLongConstant(0);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100613 case DataType::Type::kFloat32:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700614 return GetGraph()->GetFloatConstant(0);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100615 case DataType::Type::kFloat64:
Mingyao Yang8df69d42015-10-22 15:40:58 -0700616 return GetGraph()->GetDoubleConstant(0);
617 default:
618 UNREACHABLE();
619 }
620 }
621
Vladimir Marko3224f382020-06-23 14:19:53 +0100622 bool CanValueBeKeptIfSameAsNew(Value value,
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000623 HInstruction* new_value,
624 HInstruction* new_value_set_instr) {
625 // For field/array set location operations, if the value is the same as the new_value
626 // it can be kept even if aliasing happens. All aliased operations will access the same memory
627 // range.
628 // For vector values, this is not true. For example:
629 // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane.
630 // VecStore array[i ,i+1,i+2,i+3] = packed_data;
631 // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
632 // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value
633 // here is not packed_data anymore.
634 //
635 // TODO: to allow such 'same value' optimization on vector data,
636 // LSA needs to report more fine-grain MAY alias information:
637 // (1) May alias due to two vector data partial overlap.
638 // e.g. a[i..i+3] and a[i+1,..,i+4].
639 // (2) May alias due to two vector data may complete overlap each other.
640 // e.g. a[i..i+3] and b[i..i+3].
641 // (3) May alias but the exact relationship between two locations is unknown.
642 // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
643 // This 'same value' optimization can apply only on case (2).
644 if (new_value_set_instr->IsVecOperation()) {
645 return false;
646 }
647
Vladimir Marko3224f382020-06-23 14:19:53 +0100648 return value.Equals(new_value);
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000649 }
650
Vladimir Marko3224f382020-06-23 14:19:53 +0100651 Value PrepareLoopValue(HBasicBlock* block, size_t idx);
652 Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx);
653 void PrepareLoopRecords(HBasicBlock* block);
654 Value MergePredecessorValues(HBasicBlock* block, size_t idx);
655 void MergePredecessorRecords(HBasicBlock* block);
Mingyao Yanga3540532018-01-25 12:17:28 -0800656
Vladimir Marko3224f382020-06-23 14:19:53 +0100657 void MaterializeNonLoopPhis(const PhiPlaceholder* phi_placeholder, DataType::Type type);
Mingyao Yange9d6e602015-10-23 17:08:42 -0700658
Vladimir Marko3224f382020-06-23 14:19:53 +0100659 void VisitGetLocation(HInstruction* instruction, size_t idx);
660 void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value);
Mingyao Yanga3540532018-01-25 12:17:28 -0800661
Vladimir Marko3224f382020-06-23 14:19:53 +0100662 void VisitBasicBlock(HBasicBlock* block) override;
663
664 enum class Phase {
665 kLoadElimination,
666 kStoreElimination
667 };
668
669 bool TryReplacingLoopPhiPlaceholderWithDefault(
670 const PhiPlaceholder* phi_placeholder,
671 DataType::Type type,
672 /*inout*/ArenaBitVector* phi_placeholders_to_materialize);
673 bool TryReplacingLoopPhiPlaceholderWithSingleInput(
674 const PhiPlaceholder* phi_placeholder,
675 /*inout*/ArenaBitVector* phi_placeholders_to_materialize);
676 const PhiPlaceholder* FindLoopPhisToMaterialize(
677 const PhiPlaceholder* phi_placeholder,
678 /*out*/ArenaBitVector* phi_placeholders_to_materialize,
679 DataType::Type type,
680 bool can_use_default_or_phi);
681 bool MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
682 DataType::Type type,
683 Phase phase);
684 bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
685 DataType::Type type,
686 Phase phase);
687 const PhiPlaceholder* TryToMaterializeLoopPhis(const PhiPlaceholder* phi_placeholder,
688 HInstruction* load);
689 void ProcessLoopPhiWithUnknownInput(const PhiPlaceholder* loop_phi_with_unknown_input);
690 void ProcessLoadsRequiringLoopPhis();
691
692 void SearchPhiPlaceholdersForKeptStores();
693 void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record);
694 void FindOldValueForPhiPlaceholder(const PhiPlaceholder* phi_placeholder, DataType::Type type);
695 void FindStoresWritingOldValues();
Mingyao Yang8df69d42015-10-22 15:40:58 -0700696
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100697 void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700698 HInstruction* object = instruction->InputAt(0);
699 const FieldInfo& field = instruction->GetFieldInfo();
700 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
Mingyao Yang8df69d42015-10-22 15:40:58 -0700701 }
702
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100703 void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700704 HInstruction* object = instruction->InputAt(0);
705 const FieldInfo& field = instruction->GetFieldInfo();
Mingyao Yang8df69d42015-10-22 15:40:58 -0700706 HInstruction* value = instruction->InputAt(1);
Aart Bikb765a3f2018-05-10 14:47:48 -0700707 size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
708 VisitSetLocation(instruction, idx, value);
Mingyao Yang8df69d42015-10-22 15:40:58 -0700709 }
710
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100711 void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700712 HInstruction* cls = instruction->InputAt(0);
Aart Bikb765a3f2018-05-10 14:47:48 -0700713 const FieldInfo& field = instruction->GetFieldInfo();
714 VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
Mingyao Yang8df69d42015-10-22 15:40:58 -0700715 }
716
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100717 void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700718 HInstruction* cls = instruction->InputAt(0);
Aart Bikb765a3f2018-05-10 14:47:48 -0700719 const FieldInfo& field = instruction->GetFieldInfo();
Vladimir Marko3224f382020-06-23 14:19:53 +0100720 HInstruction* value = instruction->InputAt(1);
Aart Bikb765a3f2018-05-10 14:47:48 -0700721 size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
Vladimir Marko3224f382020-06-23 14:19:53 +0100722 VisitSetLocation(instruction, idx, value);
Mingyao Yang8df69d42015-10-22 15:40:58 -0700723 }
724
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100725 void VisitArrayGet(HArrayGet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700726 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
Mingyao Yang8df69d42015-10-22 15:40:58 -0700727 }
728
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100729 void VisitArraySet(HArraySet* instruction) override {
Aart Bikb765a3f2018-05-10 14:47:48 -0700730 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
xueliang.zhongd71f1dc2018-01-24 17:24:16 +0000731 VisitSetLocation(instruction, idx, instruction->GetValue());
732 }
733
734 void VisitVecLoad(HVecLoad* instruction) override {
735 VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
736 }
737
738 void VisitVecStore(HVecStore* instruction) override {
739 size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
740 VisitSetLocation(instruction, idx, instruction->GetValue());
Mingyao Yang8df69d42015-10-22 15:40:58 -0700741 }
742
Andreas Gampefa6a1b02018-09-07 08:11:55 -0700743 void VisitDeoptimize(HDeoptimize* instruction) override {
Vladimir Marko3224f382020-06-23 14:19:53 +0100744 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yangeb2d2d346e2017-03-02 13:26:17 -0800745 heap_values_for_[instruction->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100746 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
747 Value* stored_by = &heap_values[i].stored_by;
748 if (stored_by->IsUnknown()) {
749 continue;
750 }
751 // Stores are generally observeable after deoptimization, except
752 // for singletons that don't escape in the deoptimization environment.
753 bool observable = true;
754 ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
755 if (info->IsSingleton()) {
756 HInstruction* reference = info->GetReference();
757 // Finalizable objects always escape.
758 if (!reference->IsNewInstance() || !reference->AsNewInstance()->IsFinalizable()) {
Mingyao Yanga3540532018-01-25 12:17:28 -0800759 // Check whether the reference for a store is used by an environment local of
Vladimir Marko3224f382020-06-23 14:19:53 +0100760 // the HDeoptimize. If not, the singleton is not observed after deoptimization.
761 const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
762 observable = std::any_of(
763 env_uses.begin(),
764 env_uses.end(),
765 [instruction](const HUseListNode<HEnvironment*>& use) {
766 return use.GetUser()->GetHolder() == instruction;
767 });
Mingyao Yangeb2d2d346e2017-03-02 13:26:17 -0800768 }
769 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100770 if (observable) {
771 KeepStores(*stored_by);
772 *stored_by = Value::Unknown();
773 }
Mingyao Yangeb2d2d346e2017-03-02 13:26:17 -0800774 }
775 }
776
Mingyao Yang46721ef2017-10-05 14:45:17 -0700777 // Keep necessary stores before exiting a method via return/throw.
778 void HandleExit(HBasicBlock* block) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100779 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
780 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700781 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
782 if (!ref_info->IsSingletonAndRemovable()) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100783 KeepStores(heap_values[i].stored_by);
784 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang46721ef2017-10-05 14:45:17 -0700785 }
786 }
787 }
788
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100789 void VisitReturn(HReturn* instruction) override {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700790 HandleExit(instruction->GetBlock());
791 }
792
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100793 void VisitReturnVoid(HReturnVoid* return_void) override {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700794 HandleExit(return_void->GetBlock());
795 }
796
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100797 void VisitThrow(HThrow* throw_instruction) override {
Mingyao Yang46721ef2017-10-05 14:45:17 -0700798 HandleExit(throw_instruction->GetBlock());
799 }
800
Mingyao Yang293f1c02017-11-08 15:22:17 -0800801 void HandleInvoke(HInstruction* instruction) {
802 SideEffects side_effects = instruction->GetSideEffects();
Vladimir Marko3224f382020-06-23 14:19:53 +0100803 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yang293f1c02017-11-08 15:22:17 -0800804 heap_values_for_[instruction->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100805 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700806 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
807 if (ref_info->IsSingleton()) {
808 // Singleton references cannot be seen by the callee.
809 } else {
Vladimir Marko3224f382020-06-23 14:19:53 +0100810 if (side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
811 // Previous stores may become visible (read) and/or impossible for LSE to track (write).
812 KeepStores(heap_values[i].stored_by);
813 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang293f1c02017-11-08 15:22:17 -0800814 }
815 if (side_effects.DoesAnyWrite()) {
Vladimir Marko3224f382020-06-23 14:19:53 +0100816 // The value may be clobbered.
817 heap_values[i].value = Value::Unknown();
Mingyao Yang293f1c02017-11-08 15:22:17 -0800818 }
Mingyao Yang8df69d42015-10-22 15:40:58 -0700819 }
820 }
821 }
822
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100823 void VisitInvoke(HInvoke* invoke) override {
Orion Hodsonac141392017-01-13 11:53:47 +0000824 HandleInvoke(invoke);
825 }
826
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100827 void VisitClinitCheck(HClinitCheck* clinit) override {
Vladimir Marko3224f382020-06-23 14:19:53 +0100828 // Class initialization check can result in class initializer calling arbitrary methods.
Mingyao Yang8df69d42015-10-22 15:40:58 -0700829 HandleInvoke(clinit);
830 }
831
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100832 void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700833 // Conservatively treat it as an invocation.
834 HandleInvoke(instruction);
835 }
836
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100837 void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700838 // Conservatively treat it as an invocation.
839 HandleInvoke(instruction);
840 }
841
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100842 void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700843 // Conservatively treat it as an invocation.
844 HandleInvoke(instruction);
845 }
846
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100847 void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700848 // Conservatively treat it as an invocation.
849 HandleInvoke(instruction);
850 }
851
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100852 void VisitNewInstance(HNewInstance* new_instance) override {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700853 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
854 if (ref_info == nullptr) {
855 // new_instance isn't used for field accesses. No need to process it.
856 return;
857 }
Mingyao Yang025c1a62017-10-30 11:19:57 -0700858 if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) {
859 DCHECK(!new_instance->IsFinalizable());
Mingyao Yang7cf9af22018-02-06 15:02:42 -0800860 // new_instance can potentially be eliminated.
Mingyao Yang062157f2016-03-02 10:15:36 -0800861 singleton_new_instances_.push_back(new_instance);
Mingyao Yang8df69d42015-10-22 15:40:58 -0700862 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100863 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yang8df69d42015-10-22 15:40:58 -0700864 heap_values_for_[new_instance->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100865 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang8df69d42015-10-22 15:40:58 -0700866 HInstruction* ref =
867 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->GetReference();
868 size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset();
869 if (ref == new_instance && offset >= mirror::kObjectHeaderSize) {
870 // Instance fields except the header fields are set to default heap values.
Vladimir Marko3224f382020-06-23 14:19:53 +0100871 heap_values[i].value = Value::Default();
872 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang8df69d42015-10-22 15:40:58 -0700873 }
874 }
875 }
876
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100877 void VisitNewArray(HNewArray* new_array) override {
Mingyao Yang86974902017-03-01 14:03:51 -0800878 ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
879 if (ref_info == nullptr) {
880 // new_array isn't used for array accesses. No need to process it.
881 return;
882 }
883 if (ref_info->IsSingletonAndRemovable()) {
Mingyao Yang7cf9af22018-02-06 15:02:42 -0800884 if (new_array->GetLength()->IsIntConstant() &&
885 new_array->GetLength()->AsIntConstant()->GetValue() >= 0) {
886 // new_array can potentially be eliminated.
887 singleton_new_instances_.push_back(new_array);
888 } else {
889 // new_array may throw NegativeArraySizeException. Keep it.
890 }
Mingyao Yang86974902017-03-01 14:03:51 -0800891 }
Vladimir Marko3224f382020-06-23 14:19:53 +0100892 ScopedArenaVector<ValueRecord>& heap_values =
Mingyao Yang86974902017-03-01 14:03:51 -0800893 heap_values_for_[new_array->GetBlock()->GetBlockId()];
Vladimir Marko3224f382020-06-23 14:19:53 +0100894 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
Mingyao Yang86974902017-03-01 14:03:51 -0800895 HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
896 HInstruction* ref = location->GetReferenceInfo()->GetReference();
897 if (ref == new_array && location->GetIndex() != nullptr) {
898 // Array elements are set to default heap values.
Vladimir Marko3224f382020-06-23 14:19:53 +0100899 heap_values[i].value = Value::Default();
900 heap_values[i].stored_by = Value::Unknown();
Mingyao Yang86974902017-03-01 14:03:51 -0800901 }
902 }
903 }
904
Mingyao Yang8df69d42015-10-22 15:40:58 -0700905 const HeapLocationCollector& heap_location_collector_;
Mingyao Yang8df69d42015-10-22 15:40:58 -0700906
Vladimir Marko009d1662017-10-10 13:21:15 +0100907 // Use local allocator for allocating memory.
908 ScopedArenaAllocator allocator_;
909
Vladimir Marko3224f382020-06-23 14:19:53 +0100910 // Phi placeholders used for keeping track of values and stores for multiple predecessors.
911 ScopedArenaVector<PhiPlaceholder> phi_placeholders_;
912
913 // The start of the Phi placeholders in the `phi_placeholders_`
914 // for each block with multiple predecessors.
915 ScopedArenaVector<size_t> phi_placeholders_begin_for_block_;
916
917 // One array of heap value records for each block.
918 ScopedArenaVector<ScopedArenaVector<ValueRecord>> heap_values_for_;
Mingyao Yang8df69d42015-10-22 15:40:58 -0700919
Vladimir Marko3224f382020-06-23 14:19:53 +0100920 // We record loads and stores for re-processing when we find a loop Phi placeholder
921 // with unknown value from a predecessor, and also for removing stores that are
922 // found to be dead, i.e. not marked in `kept_stores_` at the end.
923 struct LoadStoreRecord {
924 HInstruction* load_or_store;
925 size_t heap_location_index;
926 };
927 ScopedArenaVector<LoadStoreRecord> loads_and_stores_;
928
Vladimir Marko9e3fe992020-08-25 16:17:51 +0100929 // We record the substitute instructions for loads that should be
930 // eliminated but may be used by heap locations. They'll be removed
931 // in the end. These are indexed by the load's id.
932 ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_;
933
Vladimir Marko3224f382020-06-23 14:19:53 +0100934 // Record stores to keep in a bit vector indexed by instruction ID.
935 ArenaBitVector kept_stores_;
936 // When we need to keep all stores that feed a Phi placeholder, we just record the
937 // index of that placeholder for processing after graph traversal.
938 ArenaBitVector phi_placeholders_to_search_for_kept_stores_;
939
940 // Loads that would require a loop Phi to replace are recorded for processing
941 // later as we do not have enough information from back-edges to determine if
942 // a suitable Phi can be found or created when we visit these loads.
943 ScopedArenaHashMap<HInstruction*, ValueRecord> loads_requiring_loop_phi_;
944
945 // For stores, record the old value records that were replaced and the stored values.
946 struct StoreRecord {
947 ValueRecord old_value_record;
948 HInstruction* stored_value;
949 };
950 ScopedArenaHashMap<HInstruction*, StoreRecord> store_records_;
951
952 // Replacements for Phi placeholders.
953 // The unknown heap value is used to mark Phi placeholders that cannot be replaced.
954 ScopedArenaVector<Value> phi_placeholder_replacements_;
Mingyao Yangfb8464a2015-11-02 10:56:59 -0800955
Vladimir Marko009d1662017-10-10 13:21:15 +0100956 ScopedArenaVector<HInstruction*> singleton_new_instances_;
Mingyao Yang8df69d42015-10-22 15:40:58 -0700957
958 DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
959};
960
Vladimir Marko3224f382020-06-23 14:19:53 +0100961ScopedArenaVector<LSEVisitor::PhiPlaceholder> LSEVisitor::CreatePhiPlaceholders(
962 HGraph* graph,
963 const HeapLocationCollector& heap_location_collector,
964 ScopedArenaAllocator* allocator) {
965 size_t num_phi_placeholders = 0u;
966 size_t num_heap_locations = heap_location_collector.GetNumberOfHeapLocations();
967 for (HBasicBlock* block : graph->GetReversePostOrder()) {
968 if (block->GetPredecessors().size() >= 2u) {
969 num_phi_placeholders += num_heap_locations;
970 }
971 }
972 ScopedArenaVector<PhiPlaceholder> phi_placeholders(allocator->Adapter(kArenaAllocLSE));
973 phi_placeholders.reserve(num_phi_placeholders);
974 for (HBasicBlock* block : graph->GetReversePostOrder()) {
975 if (block->GetPredecessors().size() >= 2u) {
976 // Create Phi placeholders referencing the block by the block ID.
977 DCHECK_LE(num_heap_locations, phi_placeholders.capacity() - phi_placeholders.size());
978 uint32_t block_id = block->GetBlockId();
979 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
980 phi_placeholders.push_back(PhiPlaceholder(block_id, idx));
981 }
982 }
983 }
984 return phi_placeholders;
985}
986
987ScopedArenaVector<size_t> LSEVisitor::CreatePhiPlaceholdersBeginForBlock(
988 HGraph* graph,
989 const HeapLocationCollector& heap_location_collector,
990 ScopedArenaAllocator* allocator) {
991 size_t num_phi_placeholders = 0u;
992 size_t num_heap_locations = heap_location_collector.GetNumberOfHeapLocations();
993 ScopedArenaVector<size_t> phi_placeholders_begin_for_block(graph->GetBlocks().size(),
994 allocator->Adapter(kArenaAllocLSE));
995 for (HBasicBlock* block : graph->GetReversePostOrder()) {
996 if (block->GetPredecessors().size() >= 2u) {
997 phi_placeholders_begin_for_block[block->GetBlockId()] = num_phi_placeholders;
998 num_phi_placeholders += num_heap_locations;
999 }
1000 }
1001 return phi_placeholders_begin_for_block;
1002}
1003
1004LSEVisitor::LSEVisitor(HGraph* graph,
1005 const HeapLocationCollector& heap_location_collector,
1006 OptimizingCompilerStats* stats)
1007 : HGraphDelegateVisitor(graph, stats),
1008 heap_location_collector_(heap_location_collector),
1009 allocator_(graph->GetArenaStack()),
1010 phi_placeholders_(CreatePhiPlaceholders(graph, heap_location_collector, &allocator_)),
1011 phi_placeholders_begin_for_block_(
1012 CreatePhiPlaceholdersBeginForBlock(graph, heap_location_collector, &allocator_)),
1013 heap_values_for_(graph->GetBlocks().size(),
1014 ScopedArenaVector<ValueRecord>(allocator_.Adapter(kArenaAllocLSE)),
1015 allocator_.Adapter(kArenaAllocLSE)),
Vladimir Marko3224f382020-06-23 14:19:53 +01001016 loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)),
Vladimir Marko9e3fe992020-08-25 16:17:51 +01001017 // We may add new instructions (default values, Phis) but we're not adding loads
1018 // or stores, so we shall not need to resize following vector and BitVector.
1019 substitute_instructions_for_loads_(graph->GetCurrentInstructionId(),
1020 nullptr,
1021 allocator_.Adapter(kArenaAllocLSE)),
Vladimir Marko3224f382020-06-23 14:19:53 +01001022 kept_stores_(&allocator_,
1023 /*start_bits=*/ graph->GetCurrentInstructionId(),
1024 /*expandable=*/ false,
1025 kArenaAllocLSE),
1026 phi_placeholders_to_search_for_kept_stores_(&allocator_,
1027 phi_placeholders_.size(),
1028 /*expandable=*/ false,
1029 kArenaAllocLSE),
1030 loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)),
1031 store_records_(allocator_.Adapter(kArenaAllocLSE)),
1032 phi_placeholder_replacements_(phi_placeholders_.size(),
1033 Value::Invalid(),
1034 allocator_.Adapter(kArenaAllocLSE)),
1035 singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) {
1036 // Clear bit vectors.
1037 phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
1038 kept_stores_.ClearAllBits();
1039}
1040
1041LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) {
1042 // If the pre-header value is known (which implies that the reference dominates this
1043 // block), use a Phi placeholder for the value in the loop header. If all predecessors
1044 // are later found to have a known value, we can replace loads from this location,
1045 // either with the pre-header value or with a new Phi. For array locations, the index
1046 // may be defined inside the loop but the only known value in that case should be the
1047 // default value or a Phi placeholder that can be replaced only with the default value.
1048 HLoopInformation* loop_info = block->GetLoopInformation();
1049 uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
1050 Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
1051 if (pre_header_value.IsUnknown()) {
1052 return Value::Unknown();
1053 }
1054 if (kIsDebugBuild) {
1055 // Check that the reference indeed dominates this loop.
1056 HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
1057 HInstruction* ref = location->GetReferenceInfo()->GetReference();
1058 CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block));
1059 // Check that the index, if defined inside the loop, tracks a default value
1060 // or a Phi placeholder requiring a loop Phi.
1061 HInstruction* index = location->GetIndex();
1062 if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
1063 CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()));
1064 }
1065 }
1066 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1067 return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder));
1068}
1069
1070LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) {
1071 // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept
1072 // if the value in the location escapes. This is not applicable to singletons that are
1073 // defined inside the loop as they shall be dead in the loop header.
1074 ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
1075 if (ref_info->IsSingleton() &&
1076 block->GetLoopInformation()->Contains(*ref_info->GetReference()->GetBlock())) {
1077 return Value::Unknown();
1078 }
1079 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1080 return Value::ForLoopPhiPlaceholder(phi_placeholder);
1081}
1082
1083void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) {
1084 DCHECK(block->IsLoopHeader());
1085 int block_id = block->GetBlockId();
1086 HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
1087 ScopedArenaVector<ValueRecord>& pre_header_heap_values =
1088 heap_values_for_[pre_header->GetBlockId()];
1089 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1090 DCHECK_EQ(num_heap_locations, pre_header_heap_values.size());
1091 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1092 DCHECK(heap_values.empty());
1093
1094 // Don't eliminate loads in irreducible loops.
1095 if (block->GetLoopInformation()->IsIrreducible()) {
1096 heap_values.resize(num_heap_locations,
1097 { /*value=*/ Value::Unknown(), /*stored_by=*/ Value::Unknown() });
1098 // Also keep the stores before the loop header, including in blocks that were not visited yet.
1099 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1100 KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx)));
1101 }
1102 return;
1103 }
1104
1105 // Fill `heap_values` based on values from pre-header.
1106 heap_values.reserve(num_heap_locations);
1107 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1108 heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) });
1109 }
1110}
1111
1112LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) {
1113 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1114 DCHECK(!predecessors.empty());
1115 Value merged_value =
1116 ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
1117 for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
1118 if (merged_value.IsUnknown()) {
1119 break;
1120 }
1121 Value pred_value =
1122 ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
1123 if (pred_value.IsUnknown()) {
1124 merged_value = Value::Unknown();
1125 } else if (!pred_value.Equals(merged_value)) {
1126 // There are conflicting known values. We may still be able to replace loads with a Phi.
1127 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1128 // Propagate the need for a new loop Phi from all predecessors.
1129 bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi();
1130 merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi));
1131 }
1132 }
1133 return merged_value;
1134}
1135
1136void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
1137 if (block->IsExitBlock()) {
1138 // Exit block doesn't really merge values since the control flow ends in
1139 // its predecessors. Each predecessor needs to make sure stores are kept
1140 // if necessary.
1141 return;
1142 }
1143
1144 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1145 DCHECK(heap_values.empty());
1146 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1147 if (block->GetPredecessors().empty()) {
1148 DCHECK(block->IsEntryBlock());
1149 heap_values.resize(num_heap_locations,
1150 { /*value=*/ Value::Unknown(), /*stored_by=*/ Value::Unknown() });
1151 return;
1152 }
1153
1154 heap_values.reserve(num_heap_locations);
1155 for (size_t idx = 0u; idx != num_heap_locations; ++idx) {
1156 Value merged_value = MergePredecessorValues(block, idx);
1157 if (kIsDebugBuild) {
1158 if (merged_value.NeedsPhi()) {
1159 uint32_t block_id = merged_value.GetPhiPlaceholder()->GetBlockId();
1160 CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block));
1161 } else if (merged_value.IsInstruction()) {
1162 CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block));
1163 }
1164 }
1165 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1166 Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by;
Vladimir Markocbeedc82020-08-25 14:31:10 +01001167 for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) {
1168 uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId();
1169 Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by;
1170 if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) &&
1171 !merged_stored_by.Equals(stored_by)) {
1172 // Use the Phi placeholder to track that we need to keep stores from all predecessors.
1173 const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
1174 merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder);
1175 break;
1176 }
Vladimir Marko3224f382020-06-23 14:19:53 +01001177 }
1178 heap_values.push_back({ merged_value, merged_stored_by });
1179 }
1180}
1181
1182static HInstruction* FindOrConstructNonLoopPhi(
1183 HBasicBlock* block,
1184 const ScopedArenaVector<HInstruction*>& phi_inputs,
1185 DataType::Type type) {
1186 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1187 HInstruction* phi = phi_it.Current();
1188 DCHECK_EQ(phi->InputCount(), phi_inputs.size());
1189 auto cmp = [](HInstruction* lhs, const HUserRecord<HInstruction*>& rhs) {
1190 return lhs == rhs.GetInstruction();
1191 };
1192 if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) {
1193 return phi;
1194 }
1195 }
1196 ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
1197 HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type);
1198 for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) {
1199 DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName();
1200 phi->SetRawInputAt(i, phi_inputs[i]);
1201 }
1202 block->AddPhi(phi);
1203 if (type == DataType::Type::kReference) {
1204 // Update reference type information. Pass invalid handles, these are not used for Phis.
1205 ReferenceTypePropagation rtp_fixup(block->GetGraph(),
1206 Handle<mirror::ClassLoader>(),
1207 Handle<mirror::DexCache>(),
1208 /* is_first_run= */ false);
1209 rtp_fixup.Visit(phi);
1210 }
1211 return phi;
1212}
1213
1214void LSEVisitor::MaterializeNonLoopPhis(const PhiPlaceholder* phi_placeholder,
1215 DataType::Type type) {
1216 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1217 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1218 size_t idx = phi_placeholder->GetHeapLocation();
1219
1220 // Use local allocator to reduce peak memory usage.
1221 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1222 // Reuse the same vector for collecting phi inputs.
1223 ScopedArenaVector<HInstruction*> phi_inputs(allocator.Adapter(kArenaAllocLSE));
1224
1225 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1226 work_queue.push_back(phi_placeholder);
1227 while (!work_queue.empty()) {
1228 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1229 if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) {
1230 // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder
1231 // that directly or indirectly depends on it, so it was already processed as part of the
1232 // other Phi placeholder's dependencies before this one got back to the top of the stack.
1233 work_queue.pop_back();
1234 continue;
1235 }
1236 uint32_t current_block_id = current_phi_placeholder->GetBlockId();
1237 HBasicBlock* current_block = blocks[current_block_id];
1238 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1239
1240 // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here.
1241 // And the only way for such merged value to reach a different heap location is through
1242 // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders
1243 // seen here are tied to one heap location.
1244 DCHECK(!current_block->IsLoopHeader());
1245 DCHECK_EQ(current_phi_placeholder->GetHeapLocation(), idx);
1246
1247 phi_inputs.clear();
1248 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1249 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1250 DCHECK(!pred_value.IsUnknown());
1251 if (pred_value.NeedsNonLoopPhi()) {
1252 // We need to process the Phi placeholder first.
1253 work_queue.push_back(pred_value.GetPhiPlaceholder());
1254 } else if (pred_value.IsDefault()) {
1255 phi_inputs.push_back(GetDefaultValue(type));
1256 } else {
1257 phi_inputs.push_back(pred_value.GetInstruction());
1258 }
1259 }
1260 if (phi_inputs.size() == current_block->GetPredecessors().size()) {
1261 // All inputs are available. Find or construct the Phi replacement.
1262 phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] =
1263 Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type));
1264 // Remove the block from the queue.
1265 DCHECK_EQ(current_phi_placeholder, work_queue.back());
1266 work_queue.pop_back();
1267 }
1268 }
1269}
1270
1271void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
1272 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1273 uint32_t block_id = instruction->GetBlock()->GetBlockId();
1274 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block_id];
1275 ValueRecord& record = heap_values[idx];
1276 DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
1277 loads_and_stores_.push_back({ instruction, idx });
1278 if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
1279 !IsDefaultOrPhiAllowedForLoad(instruction)) {
1280 record.value = Value::Unknown();
1281 }
1282 if (record.value.IsDefault()) {
Vladimir Markocbeedc82020-08-25 14:31:10 +01001283 KeepStores(record.stored_by);
Vladimir Marko3224f382020-06-23 14:19:53 +01001284 HInstruction* constant = GetDefaultValue(instruction->GetType());
1285 AddRemovedLoad(instruction, constant);
1286 record.value = Value::ForInstruction(constant);
1287 } else if (record.value.IsUnknown()) {
1288 // Load isn't eliminated. Put the load as the value into the HeapLocation.
1289 // This acts like GVN but with better aliasing analysis.
1290 record.value = Value::ForInstruction(instruction);
1291 KeepStoresIfAliasedToLocation(heap_values, idx);
1292 } else if (record.value.NeedsLoopPhi()) {
1293 // We do not know yet if the value is known for all back edges. Record for future processing.
1294 loads_requiring_loop_phi_.insert(std::make_pair(instruction, record));
1295 } else {
1296 // This load can be eliminated but we may need to construct non-loop Phis.
1297 if (record.value.NeedsNonLoopPhi()) {
1298 MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType());
1299 record.value = Replacement(record.value);
1300 }
1301 HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
1302 AddRemovedLoad(instruction, heap_value);
1303 TryRemovingNullCheck(instruction);
1304 }
1305}
1306
1307void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
1308 DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
1309 DCHECK(!IsStore(value)) << value->DebugName();
1310 // value may already have a substitute.
1311 value = FindSubstitute(value);
1312 HBasicBlock* block = instruction->GetBlock();
1313 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1314 ValueRecord& record = heap_values[idx];
1315 DCHECK(!record.value.IsInstruction() ||
1316 FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction());
1317
1318 if (record.value.Equals(value)) {
1319 // Store into the heap location with the same value.
1320 // This store can be eliminated right away.
1321 block->RemoveInstruction(instruction);
1322 return;
1323 }
1324
1325 store_records_.insert(std::make_pair(instruction, StoreRecord{record, value}));
1326 loads_and_stores_.push_back({ instruction, idx });
1327
1328 // If the `record.stored_by` specified a store from this block, it shall be removed
1329 // at the end, except for throwing ArraySet; it cannot be marked for keeping in
1330 // `kept_stores_` anymore after we update the `record.stored_by` below.
1331 DCHECK(!record.stored_by.IsInstruction() ||
1332 record.stored_by.GetInstruction()->GetBlock() != block ||
1333 record.stored_by.GetInstruction()->CanThrow() ||
1334 !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId()));
1335
1336 if (instruction->CanThrow()) {
1337 // Previous stores can become visible.
1338 HandleExit(instruction->GetBlock());
1339 // We cannot remove a possibly throwing store.
1340 // After marking it as kept, it does not matter if we track it in `stored_by` or not.
1341 kept_stores_.SetBit(instruction->GetId());
1342 }
1343
1344 // Update the record.
1345 auto it = loads_requiring_loop_phi_.find(value);
1346 if (it != loads_requiring_loop_phi_.end()) {
1347 // Propapate the Phi placeholder to the record.
1348 record.value = it->second.value;
1349 DCHECK(record.value.NeedsLoopPhi());
1350 } else {
1351 record.value = Value::ForInstruction(value);
1352 }
1353 // Track the store in the value record. If the value is loaded or needed after
1354 // return/deoptimization later, this store isn't really redundant.
1355 record.stored_by = Value::ForInstruction(instruction);
1356
1357 // This store may kill values in other heap locations due to aliasing.
1358 for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
1359 if (i == idx ||
1360 heap_values[i].value.IsUnknown() ||
1361 CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) ||
1362 !heap_location_collector_.MayAlias(i, idx)) {
1363 continue;
1364 }
1365 // Kill heap locations that may alias and keep previous stores to these locations.
1366 KeepStores(heap_values[i].stored_by);
1367 heap_values[i].stored_by = Value::Unknown();
1368 heap_values[i].value = Value::Unknown();
1369 }
1370}
1371
1372void LSEVisitor::VisitBasicBlock(HBasicBlock* block) {
1373 // Populate the heap_values array for this block.
1374 // TODO: try to reuse the heap_values array from one predecessor if possible.
1375 if (block->IsLoopHeader()) {
1376 PrepareLoopRecords(block);
1377 } else {
1378 MergePredecessorRecords(block);
1379 }
1380 // Visit instructions.
1381 HGraphVisitor::VisitBasicBlock(block);
1382}
1383
1384bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
1385 const PhiPlaceholder* phi_placeholder,
1386 DataType::Type type,
1387 /*inout*/ArenaBitVector* phi_placeholders_to_materialize) {
1388 // Use local allocator to reduce peak memory usage.
1389 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1390 ArenaBitVector visited(&allocator,
1391 /*start_bits=*/ phi_placeholders_.size(),
1392 /*expandable=*/ false,
1393 kArenaAllocLSE);
1394 visited.ClearAllBits();
1395 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1396
1397 // Use depth first search to check if any non-Phi input is unknown.
1398 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1399 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
1400 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1401 work_queue.push_back(phi_placeholder);
1402 while (!work_queue.empty()) {
1403 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1404 work_queue.pop_back();
1405 HBasicBlock* block = blocks[current_phi_placeholder->GetBlockId()];
1406 DCHECK_GE(block->GetPredecessors().size(), 2u);
1407 size_t idx = current_phi_placeholder->GetHeapLocation();
1408 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1409 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1410 if (value.NeedsPhi()) {
1411 // Visit the predecessor Phi placeholder if it's not visited yet.
1412 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1413 visited.SetBit(PhiPlaceholderIndex(value));
1414 work_queue.push_back(value.GetPhiPlaceholder());
1415 }
1416 } else if (!value.Equals(Value::Default())) {
1417 return false; // Report failure.
1418 }
1419 }
1420 if (block->IsLoopHeader()) {
1421 // For back-edges we need to check all locations that write to the same array,
1422 // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]`
1423 // as they may actually refer to the same locations for different iterations.
1424 for (size_t i = 0; i != num_heap_locations; ++i) {
1425 if (i == idx ||
1426 heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() !=
1427 heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) {
1428 continue;
1429 }
1430 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1431 // Check if there were any writes to this location.
1432 // Note: We could simply process the values but due to the vector operation
1433 // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause
1434 // the value to change and not be equal to default. To work around this and
1435 // allow replacing the non-vector load of loop-invariant default values
1436 // anyway, skip over paths that do not have any writes.
1437 ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i];
1438 while (record.stored_by.NeedsLoopPhi() &&
1439 blocks[record.stored_by.GetPhiPlaceholder()->GetBlockId()]->IsLoopHeader()) {
1440 HLoopInformation* loop_info =
1441 blocks[record.stored_by.GetPhiPlaceholder()->GetBlockId()]->GetLoopInformation();
1442 record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i];
1443 }
1444 Value value = ReplacementOrValue(record.value);
1445 if (value.NeedsPhi()) {
1446 // Visit the predecessor Phi placeholder if it's not visited yet.
1447 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1448 visited.SetBit(PhiPlaceholderIndex(value));
1449 work_queue.push_back(value.GetPhiPlaceholder());
1450 }
1451 } else if (!value.Equals(Value::Default())) {
1452 return false; // Report failure.
1453 }
1454 }
1455 }
1456 }
1457 }
1458
1459 // Record replacement and report success.
1460 HInstruction* replacement = GetDefaultValue(type);
1461 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1462 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1463 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1464 }
1465 phi_placeholders_to_materialize->Subtract(&visited);
1466 return true;
1467}
1468
1469bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
1470 const PhiPlaceholder* phi_placeholder,
1471 /*inout*/ArenaBitVector* phi_placeholders_to_materialize) {
1472 // Use local allocator to reduce peak memory usage.
1473 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1474 ArenaBitVector visited(&allocator,
1475 /*start_bits=*/ phi_placeholders_.size(),
1476 /*expandable=*/ false,
1477 kArenaAllocLSE);
1478 visited.ClearAllBits();
1479 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1480
1481 // Use depth first search to check if any non-Phi input is unknown.
1482 HInstruction* replacement = nullptr;
1483 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1484 visited.SetBit(PhiPlaceholderIndex(phi_placeholder));
1485 work_queue.push_back(phi_placeholder);
1486 while (!work_queue.empty()) {
1487 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1488 work_queue.pop_back();
1489 HBasicBlock* current_block = blocks[current_phi_placeholder->GetBlockId()];
1490 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1491 size_t idx = current_phi_placeholder->GetHeapLocation();
1492 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1493 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1494 if (value.NeedsPhi()) {
1495 // Visit the predecessor Phi placeholder if it's not visited yet.
1496 if (!visited.IsBitSet(PhiPlaceholderIndex(value))) {
1497 visited.SetBit(PhiPlaceholderIndex(value));
1498 work_queue.push_back(value.GetPhiPlaceholder());
1499 }
1500 } else {
1501 if (!value.IsInstruction() ||
1502 (replacement != nullptr && replacement != value.GetInstruction())) {
1503 return false; // Report failure.
1504 }
1505 replacement = value.GetInstruction();
1506 }
1507 }
1508 }
1509
1510 // Record replacement and report success.
1511 DCHECK(replacement != nullptr);
1512 for (uint32_t phi_placeholder_index : visited.Indexes()) {
1513 DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
1514 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1515 }
1516 phi_placeholders_to_materialize->Subtract(&visited);
1517 return true;
1518}
1519
1520const LSEVisitor::PhiPlaceholder* LSEVisitor::FindLoopPhisToMaterialize(
1521 const PhiPlaceholder* phi_placeholder,
1522 /*inout*/ArenaBitVector* phi_placeholders_to_materialize,
1523 DataType::Type type,
1524 bool can_use_default_or_phi) {
1525 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1526
1527 // Use local allocator to reduce peak memory usage.
1528 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1529 ScopedArenaVector<const PhiPlaceholder*> work_queue(allocator.Adapter(kArenaAllocLSE));
1530
1531 // Use depth first search to check if any non-Phi input is unknown.
1532 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1533 phi_placeholders_to_materialize->ClearAllBits();
1534 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder));
1535 work_queue.push_back(phi_placeholder);
1536 while (!work_queue.empty()) {
1537 const PhiPlaceholder* current_phi_placeholder = work_queue.back();
1538 work_queue.pop_back();
1539 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) {
1540 // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`.
1541 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals(
1542 Value::Default()));
1543 continue;
1544 }
1545 HBasicBlock* current_block = blocks[current_phi_placeholder->GetBlockId()];
1546 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1547 size_t idx = current_phi_placeholder->GetHeapLocation();
1548 if (current_block->IsLoopHeader()) {
1549 // If the index is defined inside the loop, it may reference different elements of the
1550 // array on each iteration. Since we do not track if all elements of an array are set
1551 // to the same value explicitly, the only known value in pre-header can be the default
1552 // value from NewArray or a Phi placeholder depending on a default value from some outer
1553 // loop pre-header. This Phi placeholder can be replaced only by the default value.
1554 HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex();
1555 if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) {
1556 if (can_use_default_or_phi &&
1557 TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder,
1558 type,
1559 phi_placeholders_to_materialize)) {
1560 continue;
1561 } else {
1562 return current_phi_placeholder; // Report the loop Phi placeholder.
1563 }
1564 }
1565 // A similar situation arises with the index defined outside the loop if we cannot use
1566 // default values or Phis, i.e. for vector loads, as we can only replace the Phi
1567 // placeholder with a single instruction defined before the loop.
1568 if (!can_use_default_or_phi) {
1569 if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder,
1570 phi_placeholders_to_materialize)) {
1571 continue;
1572 } else {
1573 return current_phi_placeholder; // Report the loop Phi placeholder.
1574 }
1575 }
1576 }
1577 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1578 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1579 if (value.IsUnknown()) {
1580 // We cannot create a Phi for this loop Phi placeholder.
1581 return current_phi_placeholder; // Report the loop Phi placeholder.
1582 }
1583 if (value.NeedsLoopPhi()) {
1584 // Visit the predecessor Phi placeholder if it's not visited yet.
1585 if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) {
1586 phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value));
1587 work_queue.push_back(value.GetPhiPlaceholder());
1588 }
1589 }
1590 }
1591 }
1592
1593 // There are no unknown values feeding this Phi, so we can construct the Phis if needed.
1594 return nullptr;
1595}
1596
1597bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector<size_t>& phi_placeholder_indexes,
1598 DataType::Type type,
1599 Phase phase) {
1600 // Materialize all predecessors that do not need a loop Phi and determine if all inputs
1601 // other than loop Phis are the same.
1602 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1603 Value other_value = Value::Invalid();
1604 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1605 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1606 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1607 DCHECK_GE(block->GetPredecessors().size(), 2u);
1608 size_t idx = phi_placeholder->GetHeapLocation();
1609 for (HBasicBlock* predecessor : block->GetPredecessors()) {
1610 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1611 if (value.NeedsNonLoopPhi()) {
1612 DCHECK(phase == Phase::kLoadElimination);
1613 MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
1614 value = Replacement(value);
1615 }
1616 if (!value.NeedsLoopPhi()) {
1617 if (other_value.IsInvalid()) {
1618 // The first other value we found.
1619 other_value = value;
1620 } else if (!other_value.IsUnknown()) {
1621 // Check if the current `value` differs from the previous `other_value`.
1622 if (!value.Equals(other_value)) {
1623 other_value = Value::Unknown();
1624 }
1625 }
1626 }
1627 }
1628 }
1629
1630 DCHECK(other_value.IsValid());
1631 if (!other_value.IsUnknown()) {
1632 HInstruction* replacement =
1633 (other_value.IsDefault()) ? GetDefaultValue(type) : other_value.GetInstruction();
1634 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1635 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
1636 }
1637 return true;
1638 }
1639
1640 // If we're materializing only a single Phi, try to match it with an existing Phi.
1641 // (Matching multiple Phis would need investigation. It may be prohibitively slow.)
1642 // This also covers the case when after replacing a previous set of Phi placeholders,
1643 // we continue with a Phi placeholder that does not really need a loop Phi anymore.
1644 if (phi_placeholder_indexes.size() == 1u) {
1645 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_indexes[0]];
1646 size_t idx = phi_placeholder->GetHeapLocation();
1647 HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder->GetBlockId()];
1648 ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors());
1649 for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
1650 HInstruction* phi = phi_it.Current();
1651 DCHECK_EQ(phi->InputCount(), predecessors.size());
1652 ArrayRef<HUserRecord<HInstruction*>> phi_inputs = phi->GetInputRecords();
1653 auto cmp = [=](const HUserRecord<HInstruction*>& lhs, HBasicBlock* rhs) {
1654 Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value);
1655 if (value.NeedsPhi()) {
1656 DCHECK(value.GetPhiPlaceholder() == phi_placeholder);
1657 return lhs.GetInstruction() == phi;
1658 } else {
1659 DCHECK(value.IsDefault() || value.IsInstruction());
1660 return value.Equals(lhs.GetInstruction());
1661 }
1662 };
1663 if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) {
1664 phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi);
1665 return true;
1666 }
1667 }
1668 }
1669
1670 if (phase == Phase::kStoreElimination) {
Vladimir Markoed29dce2020-08-21 17:25:16 +01001671 // We're not creating Phis during the final store elimination phase.
Vladimir Marko3224f382020-06-23 14:19:53 +01001672 return false;
1673 }
1674
1675 // There are different inputs to the Phi chain. Create the Phis.
1676 ArenaAllocator* allocator = GetGraph()->GetAllocator();
1677 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1678 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1679 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1680 phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(
1681 new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type));
1682 }
1683 // Fill the Phi inputs.
1684 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1685 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1686 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1687 size_t idx = phi_placeholder->GetHeapLocation();
1688 HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction();
1689 for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) {
1690 HBasicBlock* predecessor = block->GetPredecessors()[i];
1691 Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1692 HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction();
1693 DCHECK_NE(input->GetType(), DataType::Type::kVoid);
1694 phi->SetRawInputAt(i, input);
1695 }
1696 }
1697 // Add the Phis to their blocks.
1698 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1699 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholder_index];
1700 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
1701 block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi());
1702 }
1703 if (type == DataType::Type::kReference) {
1704 ScopedArenaAllocator local_allocator(allocator_.GetArenaStack());
1705 ScopedArenaVector<HInstruction*> phis(local_allocator.Adapter(kArenaAllocLSE));
1706 for (size_t phi_placeholder_index : phi_placeholder_indexes) {
1707 phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction());
1708 }
1709 // Update reference type information. Pass invalid handles, these are not used for Phis.
1710 ReferenceTypePropagation rtp_fixup(GetGraph(),
1711 Handle<mirror::ClassLoader>(),
1712 Handle<mirror::DexCache>(),
1713 /* is_first_run= */ false);
1714 rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
1715 }
1716
1717 return true;
1718}
1719
1720bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize,
1721 DataType::Type type,
1722 Phase phase) {
1723 // Use local allocator to reduce peak memory usage.
1724 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1725
Vladimir Markoed29dce2020-08-21 17:25:16 +01001726 // We want to recognize when a subset of these loop Phis that do not need other
Vladimir Marko3224f382020-06-23 14:19:53 +01001727 // loop Phis, i.e. a transitive closure, has only one other instruction as an input,
1728 // i.e. that instruction can be used instead of each Phi in the set. See for example
1729 // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall
1730 // materialize these loop Phis from the smallest transitive closure.
1731
1732 // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage,
1733 // assign new indexes to the Phi placeholders, making the matrix dense.
1734 ScopedArenaVector<size_t> matrix_indexes(phi_placeholders_.size(),
1735 static_cast<size_t>(-1), // Invalid.
1736 allocator.Adapter(kArenaAllocLSE));
1737 ScopedArenaVector<size_t> phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE));
1738 size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits();
1739 phi_placeholder_indexes.reserve(num_phi_placeholders);
1740 for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) {
1741 matrix_indexes[marker_index] = phi_placeholder_indexes.size();
1742 phi_placeholder_indexes.push_back(marker_index);
1743 }
1744 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
1745 ScopedArenaVector<ArenaBitVector*> dependencies(allocator.Adapter(kArenaAllocLSE));
1746 dependencies.reserve(num_phi_placeholders);
1747 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1748 static constexpr bool kExpandable = false;
1749 dependencies.push_back(
1750 ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE));
1751 ArenaBitVector* current_dependencies = dependencies.back();
1752 current_dependencies->ClearAllBits();
1753 current_dependencies->SetBit(matrix_index); // Count the Phi placeholder as its own dependency.
1754 const PhiPlaceholder* current_phi_placeholder =
1755 &phi_placeholders_[phi_placeholder_indexes[matrix_index]];
1756 HBasicBlock* current_block = blocks[current_phi_placeholder->GetBlockId()];
1757 DCHECK_GE(current_block->GetPredecessors().size(), 2u);
1758 size_t idx = current_phi_placeholder->GetHeapLocation();
1759 for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
1760 Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
1761 if (pred_value.NeedsLoopPhi()) {
1762 size_t pred_value_index = PhiPlaceholderIndex(pred_value);
1763 DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid());
1764 DCHECK_NE(matrix_indexes[pred_value_index], static_cast<size_t>(-1));
1765 current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]);
1766 }
1767 }
1768 }
1769
1770 // Use the Floyd-Warshall algorithm to determine all transitive dependencies.
1771 for (size_t k = 0; k != num_phi_placeholders; ++k) {
1772 for (size_t i = 0; i != num_phi_placeholders; ++i) {
1773 for (size_t j = 0; j != num_phi_placeholders; ++j) {
1774 if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) {
1775 dependencies[i]->SetBit(j);
1776 }
1777 }
1778 }
1779 }
1780
1781 // Count the number of transitive dependencies for each replaceable Phi placeholder.
1782 ScopedArenaVector<size_t> num_dependencies(allocator.Adapter(kArenaAllocLSE));
1783 num_dependencies.reserve(num_phi_placeholders);
1784 for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1785 num_dependencies.push_back(dependencies[matrix_index]->NumSetBits());
1786 }
1787
1788 // Pick a Phi placeholder with the smallest number of transitive dependencies and
1789 // materialize it and its dependencies. Repeat until we have materialized all.
1790 ScopedArenaVector<size_t> current_subset(allocator.Adapter(kArenaAllocLSE));
1791 current_subset.reserve(num_phi_placeholders);
1792 size_t remaining_phi_placeholders = num_phi_placeholders;
1793 while (remaining_phi_placeholders != 0u) {
1794 auto it = std::min_element(num_dependencies.begin(), num_dependencies.end());
1795 DCHECK_LE(*it, remaining_phi_placeholders);
1796 size_t current_matrix_index = std::distance(num_dependencies.begin(), it);
1797 ArenaBitVector* current_dependencies = dependencies[current_matrix_index];
1798 size_t current_num_dependencies = num_dependencies[current_matrix_index];
1799 current_subset.clear();
1800 for (uint32_t matrix_index : current_dependencies->Indexes()) {
1801 current_subset.push_back(phi_placeholder_indexes[matrix_index]);
1802 }
1803 if (!MaterializeLoopPhis(current_subset, type, phase)) {
1804 DCHECK(phase == Phase::kStoreElimination);
1805 // This is the final store elimination phase and we shall not be able to eliminate any
1806 // stores that depend on the current subset, so mark these Phi placeholders unreplaceable.
1807 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1808 if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
1809 DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid());
1810 phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] = Value::Unknown();
1811 }
1812 }
1813 return false;
1814 }
1815 for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) {
1816 if (current_dependencies->IsBitSet(matrix_index)) {
1817 // Mark all dependencies as done by incrementing their `num_dependencies[.]`,
1818 // so that they shall never be the minimum again.
1819 num_dependencies[matrix_index] = num_phi_placeholders;
1820 } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) {
1821 // Remove dependencies from other Phi placeholders.
1822 dependencies[matrix_index]->Subtract(current_dependencies);
1823 num_dependencies[matrix_index] -= current_num_dependencies;
1824 }
1825 }
1826 remaining_phi_placeholders -= current_num_dependencies;
1827 }
1828 return true;
1829}
1830
1831const LSEVisitor::PhiPlaceholder* LSEVisitor::TryToMaterializeLoopPhis(
1832 const PhiPlaceholder* phi_placeholder,
1833 HInstruction* load) {
1834 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
1835
1836 // Use local allocator to reduce peak memory usage.
1837 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
1838
1839 // Find Phi placeholders to materialize.
1840 ArenaBitVector phi_placeholders_to_materialize(
1841 &allocator, phi_placeholders_.size(), /*expandable=*/ false, kArenaAllocLSE);
1842 phi_placeholders_to_materialize.ClearAllBits();
1843 DataType::Type type = load->GetType();
1844 bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load);
1845 const PhiPlaceholder* loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
1846 phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi);
1847 if (loop_phi_with_unknown_input != nullptr) {
1848 return loop_phi_with_unknown_input; // Return failure.
1849 }
1850
1851 bool success =
1852 MaterializeLoopPhis(phi_placeholders_to_materialize, type, Phase::kLoadElimination);
1853 DCHECK(success);
1854
1855 // Report success.
1856 return nullptr;
1857}
1858
1859// Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may
1860// find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and
1861// propagate the load(s) as the new value(s) to successors; this may uncover new elimination
1862// opportunities. If we find no such load, we shall at least propagate an unknown value to some
1863// heap location that is needed by another loop Phi placeholder.
1864void LSEVisitor::ProcessLoopPhiWithUnknownInput(const PhiPlaceholder* loop_phi_with_unknown_input) {
1865 size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input);
1866 DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid());
1867 phi_placeholder_replacements_[loop_phi_with_unknown_input_index] = Value::Unknown();
1868
1869 uint32_t block_id = loop_phi_with_unknown_input->GetBlockId();
1870 const ArenaVector<HBasicBlock*> reverse_post_order = GetGraph()->GetReversePostOrder();
1871 size_t reverse_post_order_index = 0;
1872 size_t reverse_post_order_size = reverse_post_order.size();
1873 size_t loads_and_stores_index = 0u;
1874 size_t loads_and_stores_size = loads_and_stores_.size();
1875
1876 // Skip blocks and instructions before the block containing the loop phi with unknown input.
1877 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
1878 while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) {
1879 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
1880 while (loads_and_stores_index != loads_and_stores_size &&
1881 loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) {
1882 ++loads_and_stores_index;
1883 }
1884 ++reverse_post_order_index;
1885 DCHECK_NE(reverse_post_order_index, reverse_post_order_size);
1886 }
1887
1888 // Use local allocator to reduce peak memory usage.
Vladimir Marko9e3fe992020-08-25 16:17:51 +01001889 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
Vladimir Marko3224f382020-06-23 14:19:53 +01001890 // Reuse one temporary vector for all remaining blocks.
1891 size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
Vladimir Marko9e3fe992020-08-25 16:17:51 +01001892 ScopedArenaVector<Value> local_heap_values(allocator.Adapter(kArenaAllocLSE));
Vladimir Marko3224f382020-06-23 14:19:53 +01001893
1894 auto get_initial_value = [this](HBasicBlock* block, size_t idx) {
1895 Value value;
1896 if (block->IsLoopHeader()) {
1897 if (block->GetLoopInformation()->IsIrreducible()) {
1898 value = Value::Unknown();
1899 } else {
1900 value = PrepareLoopValue(block, idx);
1901 }
1902 } else {
1903 value = MergePredecessorValues(block, idx);
1904 }
1905 DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value));
1906 return value;
1907 };
1908
1909 // Process remaining blocks and instructions.
1910 bool found_unreplaceable_load = false;
1911 bool replaced_heap_value_with_unknown = false;
1912 for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) {
1913 HBasicBlock* block = reverse_post_order[reverse_post_order_index];
1914 if (block->IsExitBlock()) {
1915 continue;
1916 }
1917
1918 // We shall reconstruct only the heap values that we need for processing loads and stores.
1919 local_heap_values.clear();
1920 local_heap_values.resize(num_heap_locations, Value::Invalid());
1921
1922 for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) {
1923 HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store;
1924 size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index;
1925 if (load_or_store->GetBlock() != block) {
1926 break; // End of instructions from the current block.
1927 }
1928 bool is_store = load_or_store->GetSideEffects().DoesAnyWrite();
1929 DCHECK_EQ(is_store, IsStore(load_or_store));
1930 HInstruction* stored_value = nullptr;
1931 if (is_store) {
1932 auto it = store_records_.find(load_or_store);
1933 DCHECK(it != store_records_.end());
1934 stored_value = it->second.stored_value;
1935 }
1936 auto it = loads_requiring_loop_phi_.find(
1937 stored_value != nullptr ? stored_value : load_or_store);
1938 if (it == loads_requiring_loop_phi_.end()) {
1939 continue; // This load or store never needed a loop Phi.
1940 }
1941 ValueRecord& record = it->second;
Vladimir Marko9e3fe992020-08-25 16:17:51 +01001942 if (is_store) {
1943 // Process the store by updating `local_heap_values[idx]`. The last update shall
1944 // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi
1945 // at the end of the block.
1946 Value replacement = ReplacementOrValue(record.value);
1947 if (replacement.NeedsLoopPhi()) {
1948 // No replacement yet, use the Phi placeholder from the load.
1949 DCHECK(record.value.NeedsLoopPhi());
1950 local_heap_values[idx] = record.value;
1951 } else {
1952 // If the load fetched a known value, use it, otherwise use the load.
1953 local_heap_values[idx] = Value::ForInstruction(
1954 replacement.IsUnknown() ? stored_value : replacement.GetInstruction());
1955 }
1956 } else {
Vladimir Marko3224f382020-06-23 14:19:53 +01001957 // Process the load unless it has previously been marked unreplaceable.
1958 if (record.value.NeedsLoopPhi()) {
1959 if (local_heap_values[idx].IsInvalid()) {
1960 local_heap_values[idx] = get_initial_value(block, idx);
1961 }
1962 if (local_heap_values[idx].IsUnknown()) {
1963 // This load cannot be replaced. Keep stores that feed the Phi placeholder
1964 // (no aliasing since then, otherwise the Phi placeholder would not have been
1965 // propagated as a value to this load) and store the load as the new heap value.
1966 found_unreplaceable_load = true;
1967 KeepStores(record.value);
1968 record.value = Value::Unknown();
1969 local_heap_values[idx] = Value::ForInstruction(load_or_store);
1970 } else if (local_heap_values[idx].NeedsLoopPhi()) {
1971 // The load may still be replaced with a Phi later.
1972 DCHECK(local_heap_values[idx].Equals(record.value));
1973 } else {
1974 // This load can be eliminated but we may need to construct non-loop Phis.
1975 if (local_heap_values[idx].NeedsNonLoopPhi()) {
1976 MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(),
1977 load_or_store->GetType());
1978 local_heap_values[idx] = Replacement(local_heap_values[idx]);
1979 }
1980 record.value = local_heap_values[idx];
1981 HInstruction* heap_value = local_heap_values[idx].GetInstruction();
1982 AddRemovedLoad(load_or_store, heap_value);
1983 TryRemovingNullCheck(load_or_store);
1984 }
1985 }
Vladimir Marko3224f382020-06-23 14:19:53 +01001986 }
1987 }
1988
1989 // All heap values that previously needed a loop Phi at the end of the block
1990 // need to be updated for processing successors.
1991 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
1992 for (size_t idx = 0; idx != num_heap_locations; ++idx) {
1993 if (heap_values[idx].value.NeedsLoopPhi()) {
1994 if (local_heap_values[idx].IsValid()) {
1995 heap_values[idx].value = local_heap_values[idx];
1996 } else {
1997 heap_values[idx].value = get_initial_value(block, idx);
1998 }
1999 if (heap_values[idx].value.IsUnknown()) {
2000 replaced_heap_value_with_unknown = true;
2001 }
2002 }
2003 }
2004 }
2005 DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown);
2006}
2007
2008void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
2009 // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly
2010 // make the result of the processing depend on the order in which we process these loads.
2011 // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the
2012 // `loads_requiring_loop_phi_` indexed by non-deterministic pointers.
2013 for (const LoadStoreRecord& load_store_record : loads_and_stores_) {
2014 auto it = loads_requiring_loop_phi_.find(load_store_record.load_or_store);
2015 if (it == loads_requiring_loop_phi_.end()) {
2016 continue;
2017 }
2018 HInstruction* load = it->first;
2019 ValueRecord& record = it->second;
2020 while (record.value.NeedsLoopPhi() &&
2021 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid()) {
2022 const PhiPlaceholder* loop_phi_with_unknown_input =
2023 TryToMaterializeLoopPhis(record.value.GetPhiPlaceholder(), load);
2024 DCHECK_EQ(loop_phi_with_unknown_input != nullptr,
2025 phi_placeholder_replacements_[PhiPlaceholderIndex(record.value)].IsInvalid());
2026 if (loop_phi_with_unknown_input != nullptr) {
2027 ProcessLoopPhiWithUnknownInput(loop_phi_with_unknown_input);
2028 }
2029 }
2030 // The load could have been marked as unreplaceable (and stores marked for keeping)
2031 // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput().
2032 DCHECK(record.value.IsUnknown() || record.value.IsInstruction() || record.value.NeedsLoopPhi());
2033 if (record.value.NeedsLoopPhi()) {
2034 record.value = Replacement(record.value);
2035 HInstruction* heap_value = record.value.GetInstruction();
2036 AddRemovedLoad(load, heap_value);
2037 TryRemovingNullCheck(load);
2038 }
2039 }
2040}
2041
2042void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
2043 ScopedArenaVector<uint32_t> work_queue(allocator_.Adapter(kArenaAllocLSE));
2044 size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits();
2045 work_queue.reserve(((start_size * 3u) + 1u) / 2u); // Reserve 1.5x start size, rounded up.
2046 for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) {
2047 work_queue.push_back(index);
2048 }
2049 const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
2050 while (!work_queue.empty()) {
2051 const PhiPlaceholder* phi_placeholder = &phi_placeholders_[work_queue.back()];
2052 work_queue.pop_back();
2053 size_t idx = phi_placeholder->GetHeapLocation();
2054 HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
2055 for (HBasicBlock* predecessor : block->GetPredecessors()) {
2056 ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
Vladimir Marko0571d472020-09-22 10:14:39 +01002057 // For loop back-edges we must also preserve all stores to locations that
2058 // may alias with the location `idx`.
Vladimir Marko3224f382020-06-23 14:19:53 +01002059 // TODO: Review whether we need to keep stores to aliased locations from pre-header.
Vladimir Marko3224f382020-06-23 14:19:53 +01002060 // TODO: Add tests cases around this.
2061 bool is_back_edge =
2062 block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
2063 size_t start = is_back_edge ? 0u : idx;
2064 size_t end = is_back_edge ? heap_values.size() : idx + 1u;
2065 for (size_t i = start; i != end; ++i) {
2066 Value stored_by = heap_values[i].stored_by;
Vladimir Marko0571d472020-09-22 10:14:39 +01002067 auto may_alias = [this, block, idx](size_t i) {
2068 DCHECK_NE(i, idx);
2069 DCHECK(block->IsLoopHeader());
2070 if (heap_location_collector_.MayAlias(i, idx)) {
2071 return true;
2072 }
2073 // For array locations with index defined inside the loop, include
2074 // all other locations in the array, even those that LSA declares
2075 // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
2076 // refer to the same locations for different iterations. (LSA's
2077 // `ComputeMayAlias()` does not consider different loop iterations.)
2078 HeapLocation* heap_loc = heap_location_collector_.GetHeapLocation(idx);
2079 HeapLocation* other_loc = heap_location_collector_.GetHeapLocation(i);
2080 if (heap_loc->IsArray() &&
2081 other_loc->IsArray() &&
2082 heap_loc->GetReferenceInfo() == other_loc->GetReferenceInfo() &&
2083 block->GetLoopInformation()->Contains(*heap_loc->GetIndex()->GetBlock())) {
2084 // If one location has index defined inside and the other index defined outside
2085 // of the loop, LSA considers them aliasing and we take an early return above.
2086 DCHECK(block->GetLoopInformation()->Contains(*other_loc->GetIndex()->GetBlock()));
2087 return true;
2088 }
2089 return false;
2090 };
2091 if (!stored_by.IsUnknown() && (i == idx || may_alias(i))) {
Vladimir Marko3224f382020-06-23 14:19:53 +01002092 if (stored_by.NeedsPhi()) {
2093 size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
2094 if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
2095 phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
2096 work_queue.push_back(phi_placeholder_index);
2097 }
2098 } else {
2099 DCHECK(IsStore(stored_by.GetInstruction()));
2100 kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
2101 }
2102 }
2103 }
2104 }
2105 }
2106}
2107
2108void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
2109 while (value_record->stored_by.IsInstruction() &&
2110 !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) {
2111 auto it = store_records_.find(value_record->stored_by.GetInstruction());
2112 DCHECK(it != store_records_.end());
2113 *value_record = it->second.old_value_record;
2114 }
2115 if (value_record->stored_by.NeedsPhi() &&
2116 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(
2117 PhiPlaceholderIndex(value_record->stored_by))) {
2118 // Some stores feeding this heap location may have been eliminated. Use the `stored_by`
2119 // Phi placeholder to recalculate the actual value.
2120 value_record->value = value_record->stored_by;
2121 }
2122 value_record->value = ReplacementOrValue(value_record->value);
2123 if (value_record->value.NeedsNonLoopPhi()) {
2124 // Treat all Phi placeholders as requiring loop Phis at this point.
2125 // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis().
2126 value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder());
2127 }
2128}
2129
2130void LSEVisitor::FindOldValueForPhiPlaceholder(const PhiPlaceholder* phi_placeholder,
2131 DataType::Type type) {
2132 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid());
2133
2134 // Use local allocator to reduce peak memory usage.
2135 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2136 ArenaBitVector visited(&allocator,
2137 /*start_bits=*/ phi_placeholders_.size(),
2138 /*expandable=*/ false,
2139 kArenaAllocLSE);
2140 visited.ClearAllBits();
2141
2142 // Find Phi placeholders to try and match against existing Phis or other replacement values.
2143 ArenaBitVector phi_placeholders_to_materialize(
2144 &allocator, phi_placeholders_.size(), /*expandable=*/ false, kArenaAllocLSE);
2145 phi_placeholders_to_materialize.ClearAllBits();
2146 const PhiPlaceholder* loop_phi_with_unknown_input = FindLoopPhisToMaterialize(
2147 phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/ true);
2148 if (loop_phi_with_unknown_input != nullptr) {
2149 // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable.
2150 phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::Unknown();
2151 phi_placeholder_replacements_[PhiPlaceholderIndex(loop_phi_with_unknown_input)] =
2152 Value::Unknown();
2153 return;
2154 }
2155
2156 bool success =
2157 MaterializeLoopPhis(phi_placeholders_to_materialize, type, Phase::kStoreElimination);
2158 DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid());
2159 DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(),
2160 !success);
2161}
2162
2163void LSEVisitor::FindStoresWritingOldValues() {
2164 // The Phi placeholder replacements have so far been used for eliminating loads,
2165 // tracking values that would be stored if all stores were kept. As we want to
2166 // compare actual old values after removing unmarked stores, prune the Phi
2167 // placeholder replacements that can be fed by values we may not actually store.
2168 // Replacements marked as unknown can be kept as they are fed by some unknown
2169 // value and would end up as unknown again if we recalculated them.
2170 for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) {
2171 if (!phi_placeholder_replacements_[i].IsUnknown() &&
2172 !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) {
2173 phi_placeholder_replacements_[i] = Value::Invalid();
2174 }
2175 }
2176
2177 // Update heap values at end of blocks.
2178 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2179 for (ValueRecord& value_record : heap_values_for_[block->GetBlockId()]) {
2180 UpdateValueRecordForStoreElimination(&value_record);
2181 }
2182 }
2183
2184 // Use local allocator to reduce peak memory usage.
2185 ScopedArenaAllocator allocator(allocator_.GetArenaStack());
2186 // Mark the stores we want to eliminate in a separate bit vector.
2187 ArenaBitVector eliminated_stores(&allocator,
2188 /*start_bits=*/ GetGraph()->GetCurrentInstructionId(),
2189 /*expandable=*/ false,
2190 kArenaAllocLSE);
2191 eliminated_stores.ClearAllBits();
2192
2193 for (auto& entry : store_records_) {
2194 HInstruction* store = entry.first;
2195 StoreRecord& store_record = entry.second;
2196 if (!kept_stores_.IsBitSet(store->GetId())) {
2197 continue; // Ignore stores that are not kept.
2198 }
2199 UpdateValueRecordForStoreElimination(&store_record.old_value_record);
2200 if (store_record.old_value_record.value.NeedsPhi()) {
2201 DataType::Type type = store_record.stored_value->GetType();
2202 FindOldValueForPhiPlaceholder(store_record.old_value_record.value.GetPhiPlaceholder(), type);
2203 store_record.old_value_record.value = ReplacementOrValue(store_record.old_value_record.value);
2204 }
2205 DCHECK(!store_record.old_value_record.value.NeedsPhi());
2206 HInstruction* stored_value = FindSubstitute(store_record.stored_value);
2207 if (store_record.old_value_record.value.Equals(stored_value)) {
2208 eliminated_stores.SetBit(store->GetId());
2209 }
2210 }
2211
2212 // Commit the stores to eliminate by removing them from `kept_stores_`.
2213 kept_stores_.Subtract(&eliminated_stores);
2214}
2215
2216void LSEVisitor::Run() {
2217 // 1. Process blocks and instructions in reverse post order.
2218 for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
2219 VisitBasicBlock(block);
2220 }
2221
2222 // 2. Process loads that require loop Phis, trying to find/create replacements.
2223 ProcessLoadsRequiringLoopPhis();
2224
2225 // 3. Determine which stores to keep and which to eliminate.
2226
2227 // Finish marking stores for keeping.
2228 SearchPhiPlaceholdersForKeptStores();
2229
2230 // Find stores that write the same value as is already present in the location.
2231 FindStoresWritingOldValues();
2232
2233 // 4. Replace loads and remove unnecessary stores and singleton allocations.
2234
2235 // Remove recorded load instructions that should be eliminated.
Vladimir Marko9e3fe992020-08-25 16:17:51 +01002236 for (const LoadStoreRecord& record : loads_and_stores_) {
2237 size_t id = dchecked_integral_cast<size_t>(record.load_or_store->GetId());
2238 HInstruction* substitute = substitute_instructions_for_loads_[id];
2239 if (substitute == nullptr) {
2240 continue;
2241 }
2242 HInstruction* load = record.load_or_store;
Vladimir Marko3224f382020-06-23 14:19:53 +01002243 DCHECK(load != nullptr);
2244 DCHECK(IsLoad(load));
2245 DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc();
Vladimir Marko3224f382020-06-23 14:19:53 +01002246 // We proactively retrieve the substitute for a removed load, so
2247 // a load that has a substitute should not be observed as a heap
2248 // location value.
2249 DCHECK_EQ(FindSubstitute(substitute), substitute);
2250
2251 load->ReplaceWith(substitute);
2252 load->GetBlock()->RemoveInstruction(load);
2253 }
2254
2255 // Remove all the stores we can.
2256 for (const LoadStoreRecord& record : loads_and_stores_) {
2257 bool is_store = record.load_or_store->GetSideEffects().DoesAnyWrite();
2258 DCHECK_EQ(is_store, IsStore(record.load_or_store));
2259 if (is_store && !kept_stores_.IsBitSet(record.load_or_store->GetId())) {
2260 record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store);
2261 }
2262 }
2263
2264 // Eliminate singleton-classified instructions:
2265 // * - Constructor fences (they never escape this thread).
2266 // * - Allocations (if they are unused).
2267 for (HInstruction* new_instance : singleton_new_instances_) {
2268 size_t removed = HConstructorFence::RemoveConstructorFences(new_instance);
2269 MaybeRecordStat(stats_,
2270 MethodCompilationStat::kConstructorFenceRemovedLSE,
2271 removed);
2272
2273 if (!new_instance->HasNonEnvironmentUses()) {
2274 new_instance->RemoveEnvironmentUsers();
2275 new_instance->GetBlock()->RemoveInstruction(new_instance);
2276 }
2277 }
2278}
2279
Aart Bik24773202018-04-26 10:28:51 -07002280bool LoadStoreElimination::Run() {
David Brazdil8993caf2015-12-07 10:04:40 +00002281 if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
Mingyao Yang8df69d42015-10-22 15:40:58 -07002282 // Debugger may set heap values or trigger deoptimization of callers.
David Brazdil8993caf2015-12-07 10:04:40 +00002283 // Try/catch support not implemented yet.
Mingyao Yang8df69d42015-10-22 15:40:58 -07002284 // Skip this optimization.
Aart Bik24773202018-04-26 10:28:51 -07002285 return false;
Mingyao Yang8df69d42015-10-22 15:40:58 -07002286 }
Vladimir Markoef898422020-06-08 10:26:06 +01002287 ScopedArenaAllocator allocator(graph_->GetArenaStack());
2288 LoadStoreAnalysis lsa(graph_, &allocator);
2289 lsa.Run();
2290 const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
xueliang.zhongc239a2b2017-04-27 15:31:37 +01002291 if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
2292 // No HeapLocation information from LSA, skip this optimization.
Aart Bik24773202018-04-26 10:28:51 -07002293 return false;
Mingyao Yang8df69d42015-10-22 15:40:58 -07002294 }
xueliang.zhongc239a2b2017-04-27 15:31:37 +01002295
Vladimir Marko3224f382020-06-23 14:19:53 +01002296 LSEVisitor lse_visitor(graph_, heap_location_collector, stats_);
2297 lse_visitor.Run();
Aart Bik24773202018-04-26 10:28:51 -07002298 return true;
Mingyao Yang8df69d42015-10-22 15:40:58 -07002299}
2300
2301} // namespace art