diff options
Diffstat (limited to 'compiler/optimizing')
28 files changed, 1241 insertions, 409 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index b2b54965b5..97b3725da1 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -126,11 +126,14 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { - // Null check on the NewArray should have been eliminated by instruction - // simplifier already. - if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { - return instruction->InputAt(0)->AsNewArray(); + static HInstruction* FromArrayLengthToArray(HInstruction* instruction) { + DCHECK(instruction->IsArrayLength() || instruction->IsNewArray()); + if (instruction->IsArrayLength()) { + HInstruction* input = instruction->InputAt(0); + if (input->IsNullCheck()) { + input = input->AsNullCheck()->InputAt(0); + } + return input; } return instruction; } @@ -146,8 +149,9 @@ class ValueBound : public ValueObject { // Some bounds are created with HNewArray* as the instruction instead // of HArrayLength*. They are treated the same. - instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); - instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + // HArrayLength with the same array input are considered equal also. + instruction1 = FromArrayLengthToArray(instruction1); + instruction2 = FromArrayLengthToArray(instruction2); return instruction1 == instruction2; } @@ -271,7 +275,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // Loop header of loop_info. Exiting loop is normal. return false; } - const GrowableArray<HBasicBlock*> successors = block->GetSuccessors(); + const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors(); for (size_t i = 0; i < successors.Size(); i++) { if (!loop_info->Contains(*successors.Get(i))) { // One of the successors exits the loop. @@ -293,8 +297,14 @@ class ArrayAccessInsideLoopFinder : public ValueObject { void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { - HBasicBlock* block = it_loop.Current(); + HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); + HBasicBlock* block = it_loop.Current(); + DCHECK(block == induction_variable_->GetBlock()); + // Skip loop header. Since narrowed value range of a MonotonicValueRange only + // applies to the loop body (after the test at the end of the loop header). + it_loop.Advance(); + for (; !it_loop.Done(); it_loop.Advance()) { + block = it_loop.Current(); DCHECK(block->IsInLoop()); if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure @@ -308,30 +318,35 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // that the loop will loop through the full monotonic value range from // initial_ to end_. So adding deoptimization might be too aggressive and can // trigger deoptimization unnecessarily even if the loop won't actually throw - // AIOOBE. Otherwise, the loop induction variable is going to cover the full - // monotonic value range from initial_ to end_, and deoptimizations are added - // iff the loop will throw AIOOBE. + // AIOOBE. found_array_length_ = nullptr; return; } for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr; instruction = instruction->GetNext()) { - if (!instruction->IsArrayGet() && !instruction->IsArraySet()) { + if (!instruction->IsBoundsCheck()) { continue; } - HInstruction* index = instruction->InputAt(1); - if (!index->IsBoundsCheck()) { + + HInstruction* length_value = instruction->InputAt(1); + if (length_value->IsIntConstant()) { + // TODO: may optimize for constant case. continue; } - HArrayLength* array_length = index->InputAt(1)->AsArrayLength(); - if (array_length == nullptr) { - DCHECK(index->InputAt(1)->IsIntConstant()); - // TODO: may optimize for constant case. + if (length_value->IsPhi()) { + // When adding deoptimizations in outer loops, we might create + // a phi for the array length, and update all uses of the + // length in the loop to that phi. Therefore, inner loops having + // bounds checks on the same array will use that phi. + // TODO: handle these cases. continue; } + DCHECK(length_value->IsArrayLength()); + HArrayLength* array_length = length_value->AsArrayLength(); + HInstruction* array = array_length->InputAt(0); if (array->IsNullCheck()) { array = array->AsNullCheck()->InputAt(0); @@ -347,7 +362,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { continue; } - index = index->AsBoundsCheck()->InputAt(0); + HInstruction* index = instruction->AsBoundsCheck()->InputAt(0); HInstruction* left = index; int32_t right = 0; if (left == induction_variable_ || @@ -375,7 +390,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // The instruction that corresponds to a MonotonicValueRange. HInstruction* induction_variable_; - // The array length of the array that's accessed inside the loop. + // The array length of the array that's accessed inside the loop body. HArrayLength* found_array_length_; // The lowest and highest constant offsets relative to induction variable @@ -411,6 +426,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } + bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { if (other_range == nullptr) { @@ -495,13 +512,30 @@ class MonotonicValueRange : public ValueRange { ValueBound GetBound() const { return bound_; } void SetEnd(HInstruction* end) { end_ = end; } void SetInclusive(bool inclusive) { inclusive_ = inclusive; } - HBasicBlock* GetLoopHead() const { + HBasicBlock* GetLoopHeader() const { DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); return induction_variable_->GetBlock(); } MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } + HBasicBlock* GetLoopHeaderSuccesorInLoop() { + HBasicBlock* header = GetLoopHeader(); + HInstruction* instruction = header->GetLastInstruction(); + DCHECK(instruction->IsIf()); + HIf* h_if = instruction->AsIf(); + HLoopInformation* loop_info = header->GetLoopInformation(); + bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor()); + bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor()); + + // Just in case it's some strange loop structure. + if (true_successor_in_loop && false_successor_in_loop) { + return nullptr; + } + DCHECK(true_successor_in_loop || false_successor_in_loop); + return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor(); + } + // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { if (other_range == nullptr) { @@ -593,12 +627,114 @@ class MonotonicValueRange : public ValueRange { } } + // Try to add HDeoptimize's in the loop pre-header first to narrow this range. + // For example, this loop: + // + // for (int i = start; i < end; i++) { + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // will be transformed to: + // + // int array_length_in_loop_body_if_needed; + // if (start >= end) { + // array_length_in_loop_body_if_needed = 0; + // } else { + // if (start < 1) deoptimize(); + // if (array == null) deoptimize(); + // array_length = array.length; + // if (end > array_length - 1) deoptimize; + // array_length_in_loop_body_if_needed = array_length; + // } + // for (int i = start; i < end; i++) { + // // No more null check and bounds check. + // // array.length value is replaced with array_length_in_loop_body_if_needed + // // in the loop body. + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // We basically first go through the loop body and find those array accesses whose + // index is at a constant offset from the induction variable ('i' in the above example), + // and update offset_low and offset_high along the way. We then add the following + // deoptimizations in the loop pre-header (suppose end is not inclusive). + // if (start < -offset_low) deoptimize(); + // if (end >= array.length - offset_high) deoptimize(); + // It might be necessary to first hoist array.length (and the null check on it) out of + // the loop with another deoptimization. + // + // In order not to trigger deoptimization unnecessarily, we want to make a strong + // guarantee that no deoptimization is triggered if the loop body itself doesn't + // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop + // body must throw AIOOBE). + // This is achieved by the following: + // 1) We only process loops that iterate through the full monotonic range from + // initial_ to end_. We do the following checks to make sure that's the case: + // a) The loop doesn't have early exit (via break, return, etc.) + // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_. + // 2) We only collect array accesses of blocks in the loop body that dominate + // all loop back edges, these array accesses are guaranteed to happen + // at each loop iteration. + // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses + // when the induction variable is at initial_ and end_ must be in a legal range. + // Since the added deoptimizations are basically checking the induction variable + // at initial_ and end_ values, no deoptimization will be triggered either. + // + // A special case is the loop body isn't entered at all. In that case, we may still + // add deoptimization due to the analysis described above. In order not to trigger + // deoptimization, we do a test between initial_ and end_ first and skip over + // the added deoptimization. + ValueRange* NarrowWithDeoptimization() { + if (increment_ != 1 && increment_ != -1) { + // In order not to trigger deoptimization unnecessarily, we want to + // make sure the loop iterates through the full range from initial_ to + // end_ so that boundaries are covered by the loop. An increment of 2, + // for example, may skip end_. + return this; + } + + if (end_ == nullptr) { + // No full info to add deoptimization. + return this; + } + + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + if (!initial_->GetBlock()->Dominates(pre_header) || + !end_->GetBlock()->Dominates(pre_header)) { + // Can't add a check in loop pre-header if the value isn't available there. + return this; + } + + ArrayAccessInsideLoopFinder finder(induction_variable_); + + if (!finder.HasFoundArrayLength()) { + // No array access was found inside the loop that can benefit + // from deoptimization. + return this; + } + + if (!AddDeoptimization(finder)) { + return this; + } + + // After added deoptimizations, induction variable fits in + // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. + ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); + ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); + // We've narrowed the range after added deoptimizations. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); + } + // Returns true if adding a (constant >= value) check for deoptimization // is allowed and will benefit compiled code. - bool CanAddDeoptimizationConstant(HInstruction* value, - int32_t constant, - bool* is_proven) { + bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + // See if we can prove the relationship first. if (value->IsIntConstant()) { if (value->AsIntConstant()->GetValue() >= constant) { @@ -615,22 +751,118 @@ class MonotonicValueRange : public ValueRange { return true; } + // Try to filter out cases that the loop entry test will never be true. + bool LoopEntryTestUseful() { + if (initial_->IsIntConstant() && end_->IsIntConstant()) { + int32_t initial_val = initial_->AsIntConstant()->GetValue(); + int32_t end_val = end_->AsIntConstant()->GetValue(); + if (increment_ == 1) { + if (inclusive_) { + return initial_val > end_val; + } else { + return initial_val >= end_val; + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + return initial_val < end_val; + } else { + return initial_val <= end_val; + } + } + } + return true; + } + + // Returns the block for adding deoptimization. + HBasicBlock* TransformLoopForDeoptimizationIfNeeded() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + // Deoptimization is only added when both initial_ and end_ are defined + // before the loop. + DCHECK(initial_->GetBlock()->Dominates(pre_header)); + DCHECK(end_->GetBlock()->Dominates(pre_header)); + + // If it can be proven the loop body is definitely entered (unless exception + // is thrown in the loop header for which triggering deoptimization is fine), + // there is no need for tranforming the loop. In that case, deoptimization + // will just be added in the loop pre-header. + if (!LoopEntryTestUseful()) { + return pre_header; + } + + HGraph* graph = header->GetGraph(); + graph->TransformLoopHeaderForBCE(header); + HBasicBlock* new_pre_header = header->GetDominator(); + DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader()); + HBasicBlock* if_block = new_pre_header->GetDominator(); + HBasicBlock* dummy_block = if_block->GetSuccessors().Get(0); // True successor. + HBasicBlock* deopt_block = if_block->GetSuccessors().Get(1); // False successor. + + dummy_block->AddInstruction(new (graph->GetArena()) HGoto()); + deopt_block->AddInstruction(new (graph->GetArena()) HGoto()); + new_pre_header->AddInstruction(new (graph->GetArena()) HGoto()); + return deopt_block; + } + + // Adds a test between initial_ and end_ to see if the loop body is entered. + // If the loop body isn't entered at all, it jumps to the loop pre-header (after + // transformation) to avoid any deoptimization. + void AddLoopBodyEntryTest() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + HBasicBlock* if_block = pre_header->GetDominator(); + HGraph* graph = header->GetGraph(); + + HCondition* cond; + if (increment_ == 1) { + if (inclusive_) { + cond = new (graph->GetArena()) HGreaterThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_); + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + cond = new (graph->GetArena()) HLessThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_); + } + } + HIf* h_if = new (graph->GetArena()) HIf(cond); + if_block->AddInstruction(cond); + if_block->AddInstruction(h_if); + } + // Adds a check that (value >= constant), and HDeoptimize otherwise. void AddDeoptimizationConstant(HInstruction* value, - int32_t constant) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t constant, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); + if (loop_entry_test_block_added) { + DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors().Get(1)); + } + HIntConstant* const_instr = graph->GetIntConstant(constant); HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); HDeoptimize* deoptimize = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } // Returns true if adding a (value <= array_length + offset) check for deoptimization @@ -640,6 +872,26 @@ class MonotonicValueRange : public ValueRange { int32_t offset, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + + if (array_length->GetBlock() == header) { + // array_length_in_loop_body_if_needed only has correct value when the loop + // body is entered. We bail out in this case. Usually array_length defined + // in the loop header is already hoisted by licm. + return false; + } else { + // array_length is defined either before the loop header already, or in + // the loop body since it's used in the loop body. If it's defined in the loop body, + // a phi array_length_in_loop_body_if_needed is used to replace it. In that case, + // all the uses of array_length must be dominated by its definition in the loop + // body. array_length_in_loop_body_if_needed is guaranteed to be the same as + // array_length once the loop body is entered so all the uses of the phi will + // use the correct value. + } + if (offset > 0) { // There might be overflow issue. // TODO: handle this, possibly with some distance relationship between @@ -667,56 +919,99 @@ class MonotonicValueRange : public ValueRange { // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. void AddDeoptimizationArrayLength(HInstruction* value, HArrayLength* array_length, - int32_t offset) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t offset, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); // We may need to hoist null-check and array_length out of loop first. - if (!array_length->GetBlock()->Dominates(pre_header)) { + if (!array_length->GetBlock()->Dominates(deopt_block)) { + // array_length must be defined in the loop body. + DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock())); + DCHECK(array_length->GetBlock() != header); + HInstruction* array = array_length->InputAt(0); HNullCheck* null_check = array->AsNullCheck(); if (null_check != nullptr) { array = null_check->InputAt(0); } - // We've already made sure array is defined before the loop when collecting + // We've already made sure the array is defined before the loop when collecting // array accesses for the loop. - DCHECK(array->GetBlock()->Dominates(pre_header)); - if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) { + DCHECK(array->GetBlock()->Dominates(deopt_block)); + if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) { // Hoist null check out of loop with a deoptimization. HNullConstant* null_constant = graph->GetNullConstant(); HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); // TODO: for one dex_pc, share the same deoptimization slow path. HDeoptimize* null_check_deoptimize = new (graph->GetArena()) HDeoptimize(null_check_cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore( - null_check_deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_deoptimize, deopt_block->GetLastInstruction()); // Eliminate null check in the loop. null_check->ReplaceWith(array); null_check->GetBlock()->RemoveInstruction(null_check); null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } - // Hoist array_length out of loop. - array_length->MoveBefore(pre_header->GetLastInstruction()); + + HArrayLength* new_array_length = new (graph->GetArena()) HArrayLength(array); + deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction()); + + if (loop_entry_test_block_added) { + // Replace array_length defined inside the loop body with a phi + // array_length_in_loop_body_if_needed. This is a synthetic phi so there is + // no vreg number for it. + HPhi* phi = new (graph->GetArena()) HPhi( + graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt); + // Set to 0 if the loop body isn't entered. + phi->SetRawInputAt(0, graph->GetIntConstant(0)); + // Set to array.length if the loop body is entered. + phi->SetRawInputAt(1, new_array_length); + pre_header->AddPhi(phi); + array_length->ReplaceWith(phi); + // Make sure phi is only used after the loop body is entered. + if (kIsDebugBuild) { + for (HUseIterator<HInstruction*> it(phi->GetUses()); + !it.Done(); + it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock())); + } + } + } else { + array_length->ReplaceWith(new_array_length); + } + + array_length->GetBlock()->RemoveInstruction(array_length); + // Use new_array_length for deopt. + array_length = new_array_length; } - HIntConstant* offset_instr = graph->GetIntConstant(offset); - HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); - HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add); - HDeoptimize* deoptimize = new (graph->GetArena()) - HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); - deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + HInstruction* added = array_length; + if (offset != 0) { + HIntConstant* offset_instr = graph->GetIntConstant(offset); + added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); + deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction()); + } + HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added); + HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction()); + deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header); } - // Add deoptimizations in loop pre-header with the collected array access + // Adds deoptimizations in loop pre-header with the collected array access // data so that value ranges can be established in loop body. // Returns true if deoptimizations are successfully added, or if it's proven // it's not necessary. @@ -733,70 +1028,60 @@ class MonotonicValueRange : public ValueRange { return false; } + HBasicBlock* deopt_block; + bool loop_entry_test_block_added = false; bool is_constant_proven, is_length_proven; + + HInstruction* const_comparing_instruction; + int32_t const_compared_to; + HInstruction* array_length_comparing_instruction; + int32_t array_length_offset; if (increment_ == 1) { // Increasing from initial_ to end_. - int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high; - if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) && - CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(initial_, -offset_low); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(end_, array_length, offset); + const_comparing_instruction = initial_; + const_compared_to = -offset_low; + array_length_comparing_instruction = end_; + array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high; + } else { + const_comparing_instruction = end_; + const_compared_to = inclusive_ ? -offset_low : -offset_low - 1; + array_length_comparing_instruction = initial_; + array_length_offset = -offset_high - 1; + } + + if (CanAddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + &is_constant_proven) && + CanAddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + &is_length_proven)) { + if (!is_constant_proven || !is_length_proven) { + deopt_block = TransformLoopForDeoptimizationIfNeeded(); + loop_entry_test_block_added = (deopt_block != pre_header); + if (loop_entry_test_block_added) { + // Loop body may be entered. + AddLoopBodyEntryTest(); } - return true; } - } else if (increment_ == -1) { - // Decreasing from initial_ to end_. - int32_t constant = inclusive_ ? -offset_low : -offset_low - 1; - if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) && - CanAddDeoptimizationArrayLength( - initial_, array_length, -offset_high - 1, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(end_, constant); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1); - } - return true; + if (!is_constant_proven) { + AddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + deopt_block, + loop_entry_test_block_added); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + deopt_block, + loop_entry_test_block_added); } + return true; } return false; } - // Try to add HDeoptimize's in the loop pre-header first to narrow this range. - ValueRange* NarrowWithDeoptimization() { - if (increment_ != 1 && increment_ != -1) { - // TODO: possibly handle overflow/underflow issues with deoptimization. - return this; - } - - if (end_ == nullptr) { - // No full info to add deoptimization. - return this; - } - - ArrayAccessInsideLoopFinder finder(induction_variable_); - - if (!finder.HasFoundArrayLength()) { - // No array access was found inside the loop that can benefit - // from deoptimization. - return this; - } - - if (!AddDeoptimization(finder)) { - return this; - } - - // After added deoptimizations, induction variable fits in - // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. - ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); - ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); - // We've narrowed the range after added deoptimizations. - return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); - } - private: HPhi* const induction_variable_; // Induction variable for this monotonic value range. HInstruction* const initial_; // Initial value. @@ -819,12 +1104,17 @@ class BCEVisitor : public HGraphVisitor { // it's likely some AIOOBE will be thrown. static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + // Added blocks for loop body entry test. + bool IsAddedBlock(HBasicBlock* block) const { + return block->GetBlockId() >= initial_block_size_; + } + explicit BCEVisitor(HGraph* graph) - : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()), - need_to_revisit_block_(false) {} + : HGraphVisitor(graph), maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().Size()) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + DCHECK(!IsAddedBlock(block)); first_constant_index_bounds_check_map_.clear(); HGraphVisitor::VisitBasicBlock(block); if (need_to_revisit_block_) { @@ -839,6 +1129,10 @@ class BCEVisitor : public HGraphVisitor { private: // Return the map of proven value ranges at the beginning of a basic block. ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { + if (IsAddedBlock(basic_block)) { + // Added blocks don't keep value ranges. + return nullptr; + } int block_id = basic_block->GetBlockId(); if (maps_.at(block_id) == nullptr) { std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( @@ -853,8 +1147,12 @@ class BCEVisitor : public HGraphVisitor { ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) { while (basic_block != nullptr) { ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); - if (map->find(instruction->GetId()) != map->end()) { - return map->Get(instruction->GetId()); + if (map != nullptr) { + if (map->find(instruction->GetId()) != map->end()) { + return map->Get(instruction->GetId()); + } + } else { + DCHECK(IsAddedBlock(basic_block)); } basic_block = basic_block->GetDominator(); } @@ -971,7 +1269,7 @@ class BCEVisitor : public HGraphVisitor { if (left_range != nullptr) { left_monotonic_range = left_range->AsMonotonicValueRange(); if (left_monotonic_range != nullptr) { - HBasicBlock* loop_head = left_monotonic_range->GetLoopHead(); + HBasicBlock* loop_head = left_monotonic_range->GetLoopHeader(); if (instruction->GetBlock() != loop_head) { // For monotonic value range, don't handle `instruction` // if it's not defined in the loop header. @@ -1013,7 +1311,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() < 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondLT); @@ -1047,7 +1345,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() > 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondGT); @@ -1083,7 +1381,16 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + DCHECK(array_length->IsIntConstant() || + array_length->IsArrayLength() || + array_length->IsPhi()); + + if (array_length->IsPhi()) { + // Input 1 of the phi contains the real array.length once the loop body is + // entered. That value will be used for bound analysis. The graph is still + // strickly in SSA form. + array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength(); + } if (!index->IsIntConstant()) { ValueRange* index_range = LookupValueRange(index, block); @@ -1238,25 +1545,26 @@ class BCEVisitor : public HGraphVisitor { } if (left_range->IsMonotonicValueRange() && - block == left_range->AsMonotonicValueRange()->GetLoopHead()) { + block == left_range->AsMonotonicValueRange()->GetLoopHeader()) { // The comparison is for an induction variable in the loop header. DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); - HBasicBlock* loop_body_successor; - if (LIKELY(block->GetLoopInformation()-> - Contains(*instruction->IfFalseSuccessor()))) { - loop_body_successor = instruction->IfFalseSuccessor(); - } else { - loop_body_successor = instruction->IfTrueSuccessor(); + HBasicBlock* loop_body_successor = + left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop(); + if (loop_body_successor == nullptr) { + // In case it's some strange loop structure. + return; } ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); - if (new_left_range == left_range) { + if ((new_left_range == left_range) || + // Range narrowed with deoptimization is usually more useful than + // a constant range. + new_left_range->IsConstantValueRange()) { // We are not successful in narrowing the monotonic value range to // a regular value range. Try using deoptimization. new_left_range = left_range->AsMonotonicValueRange()-> NarrowWithDeoptimization(); if (new_left_range != left_range) { - GetValueRangeMap(instruction->IfFalseSuccessor())-> - Overwrite(left->GetId(), new_left_range); + GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range); } } } @@ -1511,6 +1819,9 @@ class BCEVisitor : public HGraphVisitor { // eliminate those bounds checks. bool need_to_revisit_block_; + // Initial number of blocks. + int32_t initial_block_size_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; @@ -1527,7 +1838,22 @@ void BoundsCheckElimination::Run() { // value can be narrowed further down in the dominator tree. // // TODO: only visit blocks that dominate some array accesses. - visitor.VisitReversePostOrder(); + HBasicBlock* last_visited_block = nullptr; + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current == last_visited_block) { + // We may insert blocks into the reverse post order list when processing + // a loop header. Don't process it again. + DCHECK(current->IsLoopHeader()); + continue; + } + if (visitor.IsAddedBlock(current)) { + // Skip added blocks. Their effects are already taken care of. + continue; + } + visitor.VisitBasicBlock(current); + last_visited_block = current; + } } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index e383ec664b..4701bddd48 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -440,22 +440,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -464,6 +458,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -472,6 +467,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -481,6 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); @@ -489,6 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); @@ -579,22 +577,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph2(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -603,6 +595,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -611,6 +604,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); @@ -619,6 +613,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); @@ -710,15 +705,17 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -728,6 +725,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -737,6 +735,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); @@ -828,22 +827,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph4(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -852,6 +845,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -1027,6 +1021,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index cbd042901d..946c0602cf 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -603,7 +603,12 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_); Primitive::Type return_type = Primitive::GetType(descriptor[0]); bool is_instance_call = invoke_type != kStatic; - size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); + // Remove the return type from the 'proto'. + size_t number_of_arguments = strlen(descriptor) - 1; + if (is_instance_call) { + // One extra argument for 'this'. + ++number_of_arguments; + } MethodReference target_method(dex_file_, method_idx); uintptr_t direct_code; @@ -614,7 +619,8 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true, &optimized_invoke_type, &target_method, &table_index, &direct_code, &direct_method)) { - VLOG(compiler) << "Did not compile " << PrettyMethod(method_idx, *dex_file_) + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) << " because a method call could not be resolved"; MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod); return false; @@ -723,10 +729,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } } - invoke = new (arena_) HInvokeStaticOrDirect( - arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, string_init_offset, invoke_type, optimized_invoke_type, - clinit_check_requirement); + invoke = new (arena_) HInvokeStaticOrDirect(arena_, + number_of_arguments, + return_type, + dex_pc, + target_method.dex_method_index, + is_recursive, + string_init_offset, + invoke_type, + optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; @@ -740,19 +752,29 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, start_index = 1; } - uint32_t descriptor_index = 1; + uint32_t descriptor_index = 1; // Skip the return type. uint32_t argument_index = start_index; if (is_string_init) { start_index = 1; } - for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { + for (size_t i = start_index; + // Make sure we don't go over the expected arguments or over the number of + // dex registers given. If the instruction was seen as dead by the verifier, + // it hasn't been properly checked. + (i < number_of_vreg_arguments) && (argument_index < number_of_arguments); + i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); - if (!is_range && is_wide && args[i] + 1 != args[i + 1]) { - LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() - << " at " << dex_pc; - // We do not implement non sequential register pair. - MaybeRecordStat(MethodCompilationStat::kNotCompiledNonSequentialRegPair); + if (!is_range + && is_wide + && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) { + // Longs and doubles should be in pairs, that is, sequential registers. The verifier should + // reject any class where this is violated. However, the verifier only does these checks + // on non trivially dead instructions, so we just bailout the compilation. + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << " because of non-sequential dex register pair in wide argument"; + MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); return false; } HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); @@ -761,7 +783,14 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, i++; } } - DCHECK_EQ(argument_index, number_of_arguments); + + if (argument_index != number_of_arguments) { + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << " because of wrong number of arguments in invoke instruction"; + MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); + return false; + } if (invoke->IsInvokeStaticOrDirect()) { invoke->SetArgumentAt(argument_index, graph_->GetCurrentMethod()); @@ -1206,14 +1235,20 @@ bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { } void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) { + // Verifier guarantees that the payload for PackedSwitch contains: + // (a) number of entries (may be zero) + // (b) first and lowest switch case value (entry 0, always present) + // (c) list of target pcs (entries 1 <= i <= N) SwitchTable table(instruction, dex_pc, false); // Value to test against. HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + // Retrieve number of entries. uint16_t num_entries = table.GetNumEntries(); - // There should be at least one entry here. - DCHECK_GT(num_entries, 0U); + if (num_entries == 0) { + return; + } // Chained cmp-and-branch, starting from starting_key. int32_t starting_key = table.GetEntryAt(0); @@ -1225,6 +1260,10 @@ void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t d } void HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) { + // Verifier guarantees that the payload for SparseSwitch contains: + // (a) number of entries (may be zero) + // (b) sorted key values (entries 0 <= i < N) + // (c) target pcs corresponding to the switch values (entries N <= i < 2*N) SwitchTable table(instruction, dex_pc, true); // Value to test against. @@ -1424,21 +1463,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::RETURN: { - DCHECK_NE(return_type_, Primitive::kPrimNot); - DCHECK_NE(return_type_, Primitive::kPrimLong); - DCHECK_NE(return_type_, Primitive::kPrimDouble); BuildReturn(instruction, return_type_); break; } case Instruction::RETURN_OBJECT: { - DCHECK(return_type_ == Primitive::kPrimNot); BuildReturn(instruction, return_type_); break; } case Instruction::RETURN_WIDE: { - DCHECK(return_type_ == Primitive::kPrimDouble || return_type_ == Primitive::kPrimLong); BuildReturn(instruction, return_type_); break; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 049b3e3a40..09f7d86605 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -236,7 +236,6 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, const GrowableArray<HBasicBlock*>& block_order) { block_order_ = &block_order; DCHECK(block_order_->Get(0) == GetGraph()->GetEntryBlock()); - DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), block_order_->Get(1))); ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; @@ -508,19 +507,14 @@ void CodeGenerator::BuildNativeGCMap( dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); - uint32_t max_native_offset = 0; - for (size_t i = 0; i < pc_infos_.Size(); i++) { - uint32_t native_offset = pc_infos_.Get(i).native_pc; - if (native_offset > max_native_offset) { - max_native_offset = native_offset; - } - } + uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); - GcMapBuilder builder(data, pc_infos_.Size(), max_native_offset, dex_gc_map.RegWidth()); - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t native_offset = pc_info.native_pc; - uint32_t dex_pc = pc_info.dex_pc; + size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth()); + for (size_t i = 0; i != num_stack_maps; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t native_offset = stack_map_entry.native_pc_offset; + uint32_t dex_pc = stack_map_entry.dex_pc; const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc; builder.AddEntry(native_offset, references); @@ -528,17 +522,17 @@ void CodeGenerator::BuildNativeGCMap( } void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const { - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t pc2dex_offset = pc_info.native_pc; - int32_t pc2dex_dalvik_offset = pc_info.dex_pc; + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t pc2dex_offset = stack_map_entry.native_pc_offset; + int32_t pc2dex_dalvik_offset = stack_map_entry.dex_pc; src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); } } void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t pc2dex_data_size = 0u; - uint32_t pc2dex_entries = pc_infos_.Size(); + uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps(); uint32_t pc2dex_offset = 0u; int32_t pc2dex_dalvik_offset = 0; uint32_t dex2pc_data_size = 0u; @@ -547,11 +541,11 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { int32_t dex2pc_dalvik_offset = 0; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); - pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset); + pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } // Walk over the blocks and find which ones correspond to catch block entries. @@ -586,12 +580,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { dex2pc_dalvik_offset = 0u; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - DCHECK(pc2dex_offset <= pc_info.native_pc); - write_pos = EncodeUnsignedLeb128(write_pos, pc_info.native_pc - pc2dex_offset); - write_pos = EncodeSignedLeb128(write_pos, pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset); + write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset); + write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -617,9 +611,9 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { auto it = table.PcToDexBegin(); auto it2 = table.DexToPcBegin(); for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - CHECK_EQ(pc_info.native_pc, it.NativePcOffset()); - CHECK_EQ(pc_info.dex_pc, it.DexPc()); + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset()); + CHECK_EQ(stack_map_entry.dex_pc, it.DexPc()); ++it; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -695,14 +689,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // Collect PC infos for the mapping table. - struct PcInfo pc_info; - pc_info.dex_pc = outer_dex_pc; - pc_info.native_pc = GetAssembler()->CodeSize(); - pc_infos_.Add(pc_info); + uint32_t native_pc = GetAssembler()->CodeSize(); if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, pc_info.native_pc, 0, 0, 0, 0); + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0); stack_map_stream_.EndStackMapEntry(); return; } @@ -719,8 +710,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, - pc_info.native_pc, + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, + native_pc, register_mask, locations->GetStackMask(), outer_environment_size, diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index c6ebf6dbd8..5b0abd76b3 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -64,11 +64,6 @@ class CodeAllocator { DISALLOW_COPY_AND_ASSIGN(CodeAllocator); }; -struct PcInfo { - uint32_t dex_pc; - uintptr_t native_pc; -}; - class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { public: SlowPathCode() { @@ -363,16 +358,15 @@ class CodeGenerator { number_of_register_pairs_(number_of_register_pairs), core_callee_save_mask_(core_callee_save_mask), fpu_callee_save_mask_(fpu_callee_save_mask), + stack_map_stream_(graph->GetArena()), + block_order_(nullptr), is_baseline_(false), graph_(graph), compiler_options_(compiler_options), - pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), - block_order_(nullptr), current_block_index_(0), is_leaf_(true), - requires_current_method_(false), - stack_map_stream_(graph->GetArena()) {} + requires_current_method_(false) {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -442,6 +436,11 @@ class CodeGenerator { const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; + StackMapStream stack_map_stream_; + + // The order to use for code generation. + const GrowableArray<HBasicBlock*>* block_order_; + // Whether we are using baseline. bool is_baseline_; @@ -455,12 +454,8 @@ class CodeGenerator { HGraph* const graph_; const CompilerOptions& compiler_options_; - GrowableArray<PcInfo> pc_infos_; GrowableArray<SlowPathCode*> slow_paths_; - // The order to use for code generation. - const GrowableArray<HBasicBlock*>* block_order_; - // The current block index in `block_order_` of the block // we are generating code for. size_t current_block_index_; @@ -471,8 +466,6 @@ class CodeGenerator { // Whether an instruction in the graph accesses the current method. bool requires_current_method_; - StackMapStream stack_map_stream_; - friend class OptimizingCFITest; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3d3e35d0fc..f6ae45238c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -392,12 +392,38 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(false /* can_relocate_branches */), + assembler_(), isa_features_(isa_features) { // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } +void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches and literal loads and emit the literal pool. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + // Adjust native pc offsets of block labels. + for (size_t block_idx = 0u, end = block_order_->Size(); block_idx != end; ++block_idx) { + HBasicBlock* block = block_order_->Get(block_idx); + // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid + // FirstNonEmptyBlock() which could lead to adjusting a label more than once. + DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size()); + Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()]; + DCHECK_EQ(block_label->IsBound(), !block->IsSingleGoto()); + if (block_label->IsBound()) { + __ AdjustLabelPosition(block_label); + } + } + + CodeGenerator::Finalize(allocator); +} + Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { switch (type) { case Primitive::kPrimLong: { @@ -2831,7 +2857,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location left = locations->InAt(0); Location right = locations->InAt(1); - NearLabel less, greater, done; + Label less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -2927,7 +2953,7 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, Register temp1, Register temp2, HInstruction* instruction) { - NearLabel fail; + Label fail; if (offset != 0) { __ LoadImmediate(temp1, offset); __ add(IP, addr, ShifterOperand(temp1)); @@ -3607,7 +3633,7 @@ void CodeGeneratorARM::MarkGCCard(Register temp, Register object, Register value, bool can_be_null) { - NearLabel is_null; + Label is_null; if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } @@ -4036,7 +4062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { Register cls = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - NearLabel done, zero; + Label done, zero; SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. @@ -4093,19 +4119,15 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - NearLabel done; // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done); + __ CompareAndBranchIfZero(obj, slow_path->GetExitLabel()); } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, temp, obj, class_offset); __ cmp(temp, ShifterOperand(cls)); __ b(slow_path->GetEntryLabel(), NE); __ Bind(slow_path->GetExitLabel()); - if (instruction->MustDoNullCheck()) { - __ Bind(&done); - } } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index af2481661a..1599a23568 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -139,10 +139,16 @@ class LocationsBuilderARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -163,10 +169,16 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + ArmAssembler* GetAssembler() const { return assembler_; } private: @@ -286,6 +298,8 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + void Finalize(CodeAllocator* allocator) OVERRIDE; + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 2d2419a284..f96810ff80 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -147,9 +147,17 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + Arm64Assembler* GetAssembler() const { return assembler_; } vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; } @@ -188,9 +196,17 @@ class LocationsBuilderARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBinaryOp(HBinaryOperation* instr); void HandleFieldSet(HInstruction* instruction); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index faf3cf9ffa..696d8d549e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -124,10 +124,16 @@ class LocationsBuilderX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); @@ -148,10 +154,16 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86Assembler* GetAssembler() const { return assembler_; } private: diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e46994c79e..215754cd46 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -134,10 +134,16 @@ class LocationsBuilderX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -158,10 +164,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86_64Assembler* GetAssembler() const { return assembler_; } private: diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 17a006cc3a..fdfe518e95 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -122,10 +122,6 @@ void HDeadCodeElimination::RemoveDeadInstructions() { if (!inst->HasSideEffects() && !inst->CanThrow() && !inst->IsSuspendCheck() - // The current method needs to stay in the graph in case of inlining. - // It is always passed anyway, and keeping it in the graph does not - // affect the generated code. - && !inst->IsCurrentMethod() // If we added an explicit barrier then we should keep it. && !inst->IsMemoryBarrier() && !inst->HasUses()) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index fd2e4e81df..b64791788d 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -21,6 +21,7 @@ #include "licm.h" #include "nodes.h" #include "optimization.h" +#include "reference_type_propagation.h" #include "register_allocator.h" #include "ssa_liveness_analysis.h" @@ -354,6 +355,24 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } + } else if (IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName) + && is_after_pass_) { + if (instruction->GetType() == Primitive::kPrimNot) { + if (instruction->IsLoadClass()) { + ScopedObjectAccess soa(Thread::Current()); + StartAttributeStream("klass") + << PrettyClass(instruction->AsLoadClass()->GetLoadedClassRTI().GetTypeHandle().Get()); + } else { + ReferenceTypeInfo info = instruction->GetReferenceTypeInfo(); + if (info.IsTop()) { + StartAttributeStream("klass") << "java.lang.Object"; + } else { + ScopedObjectAccess soa(Thread::Current()); + StartAttributeStream("klass") << PrettyClass(info.GetTypeHandle().Get()); + } + StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; + } + } } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index c3fc33735a..92ebf060eb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -27,6 +27,7 @@ #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" +#include "reference_type_propagation.h" #include "register_allocator.h" #include "ssa_phi_elimination.h" #include "scoped_thread_state_change.h" @@ -57,7 +58,7 @@ void HInliner::Run() { next_block = (i == blocks.Size() - 1) ? nullptr : blocks.Get(i + 1); for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { HInstruction* next = instruction->GetNext(); - HInvokeStaticOrDirect* call = instruction->AsInvokeStaticOrDirect(); + HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { // We use the original invoke type to ensure the resolution of the called method @@ -83,6 +84,93 @@ void HInliner::Run() { } } +static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return method->IsFinal() || method->GetDeclaringClass()->IsFinal(); +} + +/** + * Given the `resolved_method` looked up in the dex cache, try to find + * the actual runtime target of an interface or virtual call. + * Return nullptr if the runtime target cannot be proven. + */ +static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (IsMethodOrDeclaringClassFinal(resolved_method)) { + // No need to lookup further, the resolved method will be the target. + return resolved_method; + } + + HInstruction* receiver = invoke->InputAt(0); + if (receiver->IsNullCheck()) { + // Due to multiple levels of inlining within the same pass, it might be that + // null check does not have the reference type of the actual receiver. + receiver = receiver->InputAt(0); + } + ReferenceTypeInfo info = receiver->GetReferenceTypeInfo(); + if (info.IsTop()) { + // We have no information on the receiver. + return nullptr; + } else if (!info.IsExact()) { + // We currently only support inlining with known receivers. + // TODO: Remove this check, we should be able to inline final methods + // on unknown receivers. + return nullptr; + } else if (info.GetTypeHandle()->IsInterface()) { + // Statically knowing that the receiver has an interface type cannot + // help us find what is the target method. + return nullptr; + } else if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(info.GetTypeHandle().Get())) { + // The method that we're trying to call is not in the receiver's class or super classes. + return nullptr; + } + + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + size_t pointer_size = cl->GetImagePointerSize(); + if (invoke->IsInvokeInterface()) { + resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface( + resolved_method, pointer_size); + } else { + DCHECK(invoke->IsInvokeVirtual()); + resolved_method = info.GetTypeHandle()->FindVirtualMethodForVirtual( + resolved_method, pointer_size); + } + + if (resolved_method == nullptr) { + // The information we had on the receiver was not enough to find + // the target method. Since we check above the exact type of the receiver, + // the only reason this can happen is an IncompatibleClassChangeError. + return nullptr; + } else if (resolved_method->IsAbstract()) { + // The information we had on the receiver was not enough to find + // the target method. Since we check above the exact type of the receiver, + // the only reason this can happen is an IncompatibleClassChangeError. + return nullptr; + } else if (IsMethodOrDeclaringClassFinal(resolved_method)) { + // A final method has to be the target method. + return resolved_method; + } else if (info.IsExact()) { + // If we found a method and the receiver's concrete type is statically + // known, we know for sure the target. + return resolved_method; + } else { + // Even if we did find a method, the receiver type was not enough to + // statically find the runtime target. + return nullptr; + } +} + +static uint32_t FindMethodIndexIn(ArtMethod* method, + const DexFile& dex_file, + uint32_t referrer_index) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) { + return method->GetDexMethodIndex(); + } else { + return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index); + } +} + bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) const { ScopedObjectAccess soa(Thread::Current()); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); @@ -99,6 +187,25 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } + if (!invoke_instruction->IsInvokeStaticOrDirect()) { + resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); + if (resolved_method == nullptr) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(method_index, caller_dex_file) + << " could not be statically determined"; + return false; + } + // We have found a method, but we need to find where that method is for the caller's + // dex file. + method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index); + if (method_index == DexFile::kDexNoIndex) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(resolved_method) + << " cannot be inlined because unaccessible to caller"; + return false; + } + } + bool same_dex_file = true; const DexFile& outer_dex_file = *outer_compilation_unit_.GetDexFile(); if (resolved_method->GetDexFile()->GetLocation().compare(outer_dex_file.GetLocation()) != 0) { @@ -149,7 +256,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } - if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, same_dex_file)) { + if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) { return false; } @@ -160,11 +267,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const { ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + const DexFile& callee_dex_file = *resolved_method->GetDexFile(); + uint32_t method_index = resolved_method->GetDexMethodIndex(); DexCompilationUnit dex_compilation_unit( nullptr, @@ -204,7 +311,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } HGraph* callee_graph = new (graph_->GetArena()) HGraph( graph_->GetArena(), - caller_dex_file, + callee_dex_file, method_index, requires_ctor_barrier, compiler_driver_->GetInstructionSet(), @@ -221,7 +328,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &inline_stats); if (!builder.BuildGraph(*code_item)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be built, so cannot be inlined"; // There could be multiple reasons why the graph could not be built, including // unaccessible methods/fields due to using a different dex cache. We do not mark @@ -231,14 +338,14 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " cannot be inlined because of the register allocator"; resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; resolved_method->SetShouldNotInline(); return false; @@ -247,11 +354,13 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // Run simple optimizations on the graph. HDeadCodeElimination dce(callee_graph, stats_); HConstantFolding fold(callee_graph); + ReferenceTypePropagation type_propagation(callee_graph, handles_); InstructionSimplifier simplify(callee_graph, stats_); HOptimization* optimizations[] = { &dce, &fold, + &type_propagation, &simplify, }; @@ -265,6 +374,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, outer_compilation_unit_, dex_compilation_unit, compiler_driver_, + handles_, stats_, depth_ + 1); inliner.Run(); @@ -275,7 +385,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an infinite loop"; resolved_method->SetShouldNotInline(); return false; @@ -289,7 +399,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because one branch always throws"; resolved_method->SetShouldNotInline(); return false; @@ -300,7 +410,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it contains a loop"; resolved_method->SetShouldNotInline(); return false; @@ -314,21 +424,21 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (current->IsInvokeInterface()) { // Disable inlining of interface calls. The cost in case of entering the // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an interface call."; resolved_method->SetShouldNotInline(); return false; } if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " needs an environment and is in a different dex file"; return false; } if (!same_dex_file && current->NeedsDexCache()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; // Do not flag the method as not-inlineable. A caller within the same diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index f7d8cf8715..24044b73a1 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -34,13 +34,15 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, CompilerDriver* compiler_driver, + StackHandleScopeCollection* handles, OptimizingCompilerStats* stats, size_t depth = 0) : HOptimization(outer_graph, true, kInlinerPassName, stats), outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), compiler_driver_(compiler_driver), - depth_(depth) {} + depth_(depth), + handles_(handles) {} void Run() OVERRIDE; @@ -50,13 +52,13 @@ class HInliner : public HOptimization { bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const; bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CompilerDriver* const compiler_driver_; const size_t depth_; + StackHandleScopeCollection* const handles_; DISALLOW_COPY_AND_ASSIGN(HInliner); }; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index fcb3471821..2daeeb3c0c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -186,33 +186,94 @@ bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* ins return false; } -void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (!check_cast->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { - check_cast->ClearMustDoNullCheck(); - } - - if (!load_class->IsResolved()) { +// Returns whether doing a type test between the class of `object` against `klass` has +// a statically known outcome. The result of the test is stored in `outcome`. +static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { + if (!klass->IsResolved()) { // If the class couldn't be resolve it's not safe to compare against it. It's // default type would be Top which might be wider that the actual class type // and thus producing wrong results. - return; + return false; } - ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + + ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (class_rti.IsSupertypeOf(obj_rti)) { + *outcome = true; + return true; + } else if (obj_rti.IsExact()) { + // The test failed at compile time so will also fail at runtime. + *outcome = false; + return true; + } else if (!class_rti.IsInterface() + && !obj_rti.IsInterface() + && !obj_rti.IsSupertypeOf(class_rti)) { + // Different type hierarchy. The test will fail. + *outcome = false; + return true; + } + return false; +} + +void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { + HInstruction* object = check_cast->InputAt(0); + if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + check_cast->ClearMustDoNullCheck(); + } + + if (object->IsNullConstant()) { check_cast->GetBlock()->RemoveInstruction(check_cast); if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); } + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(check_cast->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome) { + check_cast->GetBlock()->RemoveInstruction(check_cast); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); + } + } else { + // Don't do anything for exceptional cases for now. Ideally we should remove + // all instructions and blocks this instruction dominates. + } } } void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { - if (!instruction->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + HInstruction* object = instruction->InputAt(0); + bool can_be_null = true; + if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + can_be_null = false; instruction->ClearMustDoNullCheck(); } + + HGraph* graph = GetGraph(); + if (object->IsNullConstant()) { + instruction->ReplaceWith(graph->GetIntConstant(0)); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(instruction->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome && can_be_null) { + // Type test will succeed, we just need a null test. + HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object); + instruction->GetBlock()->InsertInstructionBefore(test, instruction); + instruction->ReplaceWith(test); + } else { + // We've statically determined the result of the instanceof. + instruction->ReplaceWith(graph->GetIntConstant(outcome)); + } + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + } } void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 024462081f..668956a614 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -36,6 +36,9 @@ class InstructionSimplifier : public HOptimization { static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd91d2c87b..68c197e607 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -288,7 +288,10 @@ void HGraph::InsertConstant(HConstant* constant) { } HNullConstant* HGraph::GetNullConstant() { - if (cached_null_constant_ == nullptr) { + // For simplicity, don't bother reviving the cached null constant if it is + // not null and not in a block. Otherwise, we need to clear the instruction + // id and/or any invariants the graph is assuming when adding new instructions. + if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) { cached_null_constant_ = new (arena_) HNullConstant(); InsertConstant(cached_null_constant_); } @@ -296,7 +299,10 @@ HNullConstant* HGraph::GetNullConstant() { } HCurrentMethod* HGraph::GetCurrentMethod() { - if (cached_current_method_ == nullptr) { + // For simplicity, don't bother reviving the cached current method if it is + // not null and not in a block. Otherwise, we need to clear the instruction + // id and/or any invariants the graph is assuming when adding new instructions. + if ((cached_current_method_ == nullptr) || (cached_current_method_->GetBlock() == nullptr)) { cached_current_method_ = new (arena_) HCurrentMethod( Is64BitInstructionSet(instruction_set_) ? Primitive::kPrimLong : Primitive::kPrimInt); if (entry_block_->GetFirstInstruction() == nullptr) { @@ -1510,6 +1516,81 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } +/* + * Loop will be transformed to: + * old_pre_header + * | + * if_block + * / \ + * dummy_block deopt_block + * \ / + * new_pre_header + * | + * header + */ +void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + + // Need this to avoid critical edge. + HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + // Need this to avoid critical edge. + HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + AddBlock(if_block); + AddBlock(dummy_block); + AddBlock(deopt_block); + AddBlock(new_pre_header); + + header->ReplacePredecessor(pre_header, new_pre_header); + pre_header->successors_.Reset(); + pre_header->dominated_blocks_.Reset(); + + pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(dummy_block); // True successor + if_block->AddSuccessor(deopt_block); // False successor + dummy_block->AddSuccessor(new_pre_header); + deopt_block->AddSuccessor(new_pre_header); + + pre_header->dominated_blocks_.Add(if_block); + if_block->SetDominator(pre_header); + if_block->dominated_blocks_.Add(dummy_block); + dummy_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(deopt_block); + deopt_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(new_pre_header); + new_pre_header->SetDominator(if_block); + new_pre_header->dominated_blocks_.Add(header); + header->SetDominator(new_pre_header); + + size_t index_of_header = 0; + while (reverse_post_order_.Get(index_of_header) != header) { + index_of_header++; + } + MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); + reverse_post_order_.Put(index_of_header++, if_block); + reverse_post_order_.Put(index_of_header++, dummy_block); + reverse_post_order_.Put(index_of_header++, deopt_block); + reverse_post_order_.Put(index_of_header++, new_pre_header); + + HLoopInformation* info = pre_header->GetLoopInformation(); + if (info != nullptr) { + if_block->SetLoopInformation(info); + dummy_block->SetLoopInformation(info); + deopt_block->SetLoopInformation(info); + new_pre_header->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*pre_header); + !loop_it.Done(); + loop_it.Advance()) { + loop_it.Current()->Add(if_block); + loop_it.Current()->Add(dummy_block); + loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(new_pre_header); + } + } +} + std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index f87775e195..9443653db7 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -195,6 +195,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); + // Need to add a couple of blocks to test if the loop body is entered and + // put deoptimization instructions, etc. + void TransformLoopHeaderForBCE(HBasicBlock* header); + // Removes `block` from the graph. void DeleteDeadBlock(HBasicBlock* block); @@ -331,6 +335,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { } // If not found or previously deleted, create and cache a new instruction. + // Don't bother reviving a previously deleted instruction, for simplicity. if (constant == nullptr || constant->GetBlock() == nullptr) { constant = new (arena_) InstructionType(value); cache->Overwrite(value, constant); @@ -824,7 +829,7 @@ class HLoopInformationOutwardIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator); }; -#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -894,6 +899,21 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Constant, Instruction) \ @@ -1281,6 +1301,9 @@ class ReferenceTypeInfo : ValueObject { bool IsExact() const { return is_exact_; } bool IsTop() const { return is_top_; } + bool IsInterface() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return !IsTop() && GetTypeHandle()->IsInterface(); + } Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } @@ -2461,7 +2484,7 @@ class HInvoke : public HInstruction { intrinsic_ = intrinsic; } - bool IsInlined() const { + bool IsFromInlinedInvoke() const { return GetEnvironment()->GetParent() != nullptr; } @@ -3581,7 +3604,7 @@ class HLoadClass : public HExpression<1> { bool CanThrow() const OVERRIDE { // May call runtime and and therefore can throw. // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } ReferenceTypeInfo GetLoadedClassRTI() { @@ -4246,6 +4269,39 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; +// Iterator over the blocks that art part of the loop. Includes blocks part +// of an inner loop. The order in which the blocks are iterated is reverse +// post order. +class HBlocksInLoopReversePostOrderIterator : public ValueObject { + public: + explicit HBlocksInLoopReversePostOrderIterator(const HLoopInformation& info) + : blocks_in_loop_(info.GetBlocks()), + blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()), + index_(0) { + if (!blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + Advance(); + } + } + + bool Done() const { return index_ == blocks_.Size(); } + HBasicBlock* Current() const { return blocks_.Get(index_); } + void Advance() { + ++index_; + for (size_t e = blocks_.Size(); index_ < e; ++index_) { + if (blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + break; + } + } + } + + private: + const BitVector& blocks_in_loop_; + const GrowableArray<HBasicBlock*>& blocks_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); +}; + inline int64_t Int64FromConstant(HConstant* constant) { DCHECK(constant->IsIntConstant() || constant->IsLongConstant()); return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index ccf8de9f6a..2d1c0ba9f9 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#include "base/arena_object.h" #include "nodes.h" #include "optimizing_compiler_stats.h" @@ -25,7 +26,7 @@ namespace art { /** * Abstraction to implement an optimization pass. */ -class HOptimization : public ValueObject { +class HOptimization : public ArenaObject<kArenaAllocMisc> { public: HOptimization(HGraph* graph, bool is_in_ssa_form, diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index b0d1433667..fe3bb1a2b4 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -71,6 +71,8 @@ class OptimizingCFITest : public CFITest { } } } + GrowableArray<HBasicBlock*> blocks(&allocator, 0); + code_gen->block_order_ = &blocks; code_gen->ComputeSpillMask(); code_gen->SetFrameSize(frame_size); code_gen->GenerateFrameEntry(); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f6ef2f7e82..8d43adaada 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -318,43 +318,61 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats, - HDeadCodeElimination::kInitialDeadCodeEliminationPassName); - HDeadCodeElimination dce2(graph, stats, - HDeadCodeElimination::kFinalDeadCodeEliminationPassName); - HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_simplify(graph); - - HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats); - - HConstantFolding fold2(graph, "constant_folding_after_inlining"); - SideEffectsAnalysis side_effects(graph); - GVNOptimization gvn(graph, side_effects); - LICM licm(graph, side_effects); - BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, handles); - InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - - IntrinsicsRecognizer intrinsics(graph, driver); + ArenaAllocator* arena = graph->GetArena(); + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); + HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + + HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_types"); + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_bce"); + ReferenceTypePropagation* type_propagation2 = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_before_codegen"); + + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations[] = { - &intrinsics, - &dce1, - &fold1, - &simplify1, - &inliner, + intrinsics, + fold1, + simplify1, + type_propagation, + dce1, + simplify2, + inliner, + // Run another type propagation phase: inlining will open up more opprotunities + // to remove checkast/instanceof and null checks. + type_propagation2, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_simplify, - &fold2, - &side_effects, - &gvn, - &licm, - &bce, - &type_propagation, - &simplify2, - &dce2, + boolean_simplify, + fold2, + side_effects, + gvn, + licm, + bce, + simplify3, + dce2, + // The codegen has a few assumptions that only the instruction simplifier can + // satisfy. For example, the code generator does not expect to see a + // HTypeConversion from a type to the same type. + simplify4, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index b6b1bb1cad..53d052b2bc 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -19,6 +19,7 @@ #include <sstream> #include <string> +#include <type_traits> #include "atomic.h" @@ -37,8 +38,8 @@ enum MethodCompilationStat { kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, + kNotCompiledMalformedOpcode, kNotCompiledNoCodegen, - kNotCompiledNonSequentialRegPair, kNotCompiledPathological, kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, @@ -84,14 +85,15 @@ class OptimizingCompilerStats { for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": " + << compile_stats_[i]; } } } } private: - std::string PrintMethodCompilationStat(int stat) const { + std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { switch (stat) { case kAttemptCompilation : return "kAttemptCompilation"; case kCompiledBaseline : return "kCompiledBaseline"; @@ -105,8 +107,8 @@ class OptimizingCompilerStats { case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; + case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; @@ -120,9 +122,12 @@ class OptimizingCompilerStats { case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; - default: LOG(FATAL) << "invalid stat"; + + case kLastStat: break; // Invalid to print out. } - return ""; + LOG(FATAL) << "invalid stat " + << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); + UNREACHABLE(); } AtomicInteger compile_stats_[kLastStat]; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index a249aa9711..ca928ae0f2 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -86,16 +86,6 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsInlined()) { - last_input->SetMustGenerateClinitCheck(false); - } - // Remove a load class instruction as last input of a static // invoke, which has been added (along with a clinit check, // removed by PrepareForRegisterAllocation::VisitClinitCheck @@ -104,10 +94,20 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire // stage (i.e., after inlining has been performed). invoke->RemoveLoadClassAsLastInput(); - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses() && !(last_input->MustGenerateClinitCheck() && invoke->IsInlined())) { - last_input->GetBlock()->RemoveInstruction(last_input); + // The static call will initialize the class so there's no need for a clinit check if + // it's the first user. + // There is one special case where we still need the clinit check, when inlining. Because + // currently the callee is responsible for reporting parameters to the GC, the code + // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. + // Therefore we cannot allocate any object in that code, including loading a new class. + if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { + last_input->SetMustGenerateClinitCheck(false); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 4f1f45769d..a048c856c5 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -23,6 +23,30 @@ namespace art { +class RTPVisitor : public HGraphDelegateVisitor { + public: + RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles) + : HGraphDelegateVisitor(graph), + handles_(handles) {} + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; + void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitNewArray(HNewArray* instr) OVERRIDE; + void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); + void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); + void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; + void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; + void VisitInvoke(HInvoke* instr) OVERRIDE; + void VisitArrayGet(HArrayGet* instr) OVERRIDE; + void UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact); + + private: + StackHandleScopeCollection* handles_; +}; + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -35,23 +59,13 @@ void ReferenceTypePropagation::Run() { void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { // TODO: handle other instructions that give type info - // (Call/array accesses) + // (array accesses) + RTPVisitor visitor(graph_, handles_); // Initialize exact types first for faster convergence. for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); - // TODO: Make ReferenceTypePropagation a visitor or create a new one. - if (instr->IsNewInstance()) { - VisitNewInstance(instr->AsNewInstance()); - } else if (instr->IsLoadClass()) { - VisitLoadClass(instr->AsLoadClass()); - } else if (instr->IsNewArray()) { - VisitNewArray(instr->AsNewArray()); - } else if (instr->IsInstanceFieldGet()) { - VisitInstanceFieldGet(instr->AsInstanceFieldGet()); - } else if (instr->IsStaticFieldGet()) { - VisitStaticFieldGet(instr->AsStaticFieldGet()); - } + instr->Accept(&visitor); } // Handle Phis. @@ -166,35 +180,39 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { } } -void ReferenceTypePropagation::SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass) { +void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, + mirror::Class* klass, + bool is_exact) { if (klass != nullptr) { ScopedObjectAccess soa(Thread::Current()); MutableHandle<mirror::Class> handle = handles_->NewHandle(klass); - instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true)); + is_exact = is_exact || klass->IsFinal(); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); } } -void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file) { +void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact) { DCHECK_EQ(instr->GetType(), Primitive::kPrimNot); ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file); // Get type from dex cache assuming it was populated by the verifier. - SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx)); + SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { - UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile()); +void RTPVisitor::VisitNewInstance(HNewInstance* instr) { + UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::VisitNewArray(HNewArray* instr) { - UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile()); +void RTPVisitor::VisitNewArray(HNewArray* instr) { + UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, - const FieldInfo& info) { +void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, + const FieldInfo& info) { // The field index is unknown only during tests. if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { return; @@ -206,18 +224,18 @@ void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache); DCHECK(field != nullptr); mirror::Class* klass = field->GetType<false>(); - SetClassAsTypeInfo(instr, klass); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } -void ReferenceTypePropagation::VisitInstanceFieldGet(HInstanceFieldGet* instr) { +void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitStaticFieldGet(HStaticFieldGet* instr) { +void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { +void RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); @@ -295,6 +313,34 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); } +void RTPVisitor::VisitInvoke(HInvoke* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile()); + ArtMethod* method = dex_cache->GetResolvedMethod( + instr->GetDexMethodIndex(), cl->GetImagePointerSize()); + DCHECK(method != nullptr); + mirror::Class* klass = method->GetReturnType(false); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); +} + +void RTPVisitor::VisitArrayGet(HArrayGet* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + HInstruction* parent = instr->InputAt(0); + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle(); + if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) { + SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false); + } +} + void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); // Be sure that we don't go over the bounded type. diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 74e425fb3e..0d687d25cb 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,23 +40,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: - void VisitNewInstance(HNewInstance* new_instance); - void VisitLoadClass(HLoadClass* load_class); - void VisitNewArray(HNewArray* instr); void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass); - void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); - void UpdateReferenceTypeInfo(HInstruction* instr, uint16_t type_idx, const DexFile& dex_file); - void VisitInstanceFieldGet(HInstanceFieldGet* instr); - void VisitStaticFieldGet(HStaticFieldGet* instr); - void ProcessWorklist(); void AddToWorklist(HInstruction* instr); void AddDependentInstructionsToWorklist(HInstruction* instr); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c4612af393..2a86e60e14 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -184,22 +184,24 @@ void SsaBuilder::FixNullConstantType() { } HInstruction* left = equality_instr->InputAt(0); HInstruction* right = equality_instr->InputAt(1); - HInstruction* null_instr = nullptr; + HInstruction* int_operand = nullptr; - if ((left->GetType() == Primitive::kPrimNot) && right->IsIntConstant()) { - null_instr = right; - } else if ((right->GetType() == Primitive::kPrimNot) && left->IsIntConstant()) { - null_instr = left; + if ((left->GetType() == Primitive::kPrimNot) && (right->GetType() == Primitive::kPrimInt)) { + int_operand = right; + } else if ((right->GetType() == Primitive::kPrimNot) + && (left->GetType() == Primitive::kPrimInt)) { + int_operand = left; } else { continue; } // If we got here, we are comparing against a reference and the int constant // should be replaced with a null constant. - if (null_instr->IsIntConstant()) { - DCHECK_EQ(0, null_instr->AsIntConstant()->GetValue()); - equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), null_instr == right ? 1 : 0); - } + // Both type propagation and redundant phi elimination ensure `int_operand` + // can only be the 0 constant. + DCHECK(int_operand->IsIntConstant()); + DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue()); + equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0); } } } @@ -255,21 +257,18 @@ void SsaBuilder::BuildSsa() { PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Fix the type for null constants which are part of an equality comparison. - FixNullConstantType(); - - // 6) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This will be fixed during the type propagation but + // 5) When creating equivalent phis we copy the inputs of the original phi which + // may be improperly typed. This was fixed during the type propagation in 4) but // as a result we may end up with two equivalent phis with the same type for // the same dex register. This pass cleans them up. EquivalentPhisCleanup(); - // 7) Mark dead phis again. Step 4) may have introduced new phis. - // Step 6) might enable the death of new phis. + // 6) Mark dead phis again. Step 4) may have introduced new phis. + // Step 5) might enable the death of new phis. SsaDeadPhiElimination dead_phis(GetGraph()); dead_phis.MarkDeadPhis(); - // 8) Now that the graph is correctly typed, we can get rid of redundant phis. + // 7) Now that the graph is correctly typed, we can get rid of redundant phis. // Note that we cannot do this phase before type propagation, otherwise // we could get rid of phi equivalents, whose presence is a requirement for the // type propagation phase. Note that this is to satisfy statement (a) of the @@ -277,6 +276,13 @@ void SsaBuilder::BuildSsa() { SsaRedundantPhiElimination redundant_phi(GetGraph()); redundant_phi.Run(); + // 8) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + // 9) Make sure environments use the right phi "equivalent": a phi marked dead // can have a phi equivalent that is not dead. We must therefore update // all environment uses of the dead phi to use its equivalent. Note that there diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 42b9182d55..65610d54a6 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -49,7 +49,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, } dex_pc_max_ = std::max(dex_pc_max_, dex_pc); - native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); register_mask_max_ = std::max(register_mask_max_, register_mask); current_dex_register_ = 0; } @@ -128,16 +127,25 @@ void StackMapStream::EndInlineInfoEntry() { current_inline_info_ = InlineInfoEntry(); } +uint32_t StackMapStream::ComputeMaxNativePcOffset() const { + uint32_t max_native_pc_offset = 0u; + for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { + max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + } + return max_native_pc_offset; +} + size_t StackMapStream::PrepareForFillIn() { int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte; inline_info_size_ = ComputeInlineInfoSize(); dex_register_maps_size_ = ComputeDexRegisterMapsSize(); + uint32_t max_native_pc_offset = ComputeMaxNativePcOffset(); stack_map_encoding_ = StackMapEncoding::CreateFromSizes(stack_mask_size_, inline_info_size_, dex_register_maps_size_, dex_pc_max_, - native_pc_offset_max_, + max_native_pc_offset, register_mask_max_); stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 274d573350..550ed70e0f 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -67,7 +67,6 @@ class StackMapStream : public ValueObject { inline_infos_(allocator, 2), stack_mask_max_(-1), dex_pc_max_(0), - native_pc_offset_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), @@ -126,6 +125,22 @@ class StackMapStream : public ValueObject { uint32_t num_dex_registers); void EndInlineInfoEntry(); + size_t GetNumberOfStackMaps() const { + return stack_maps_.Size(); + } + + const StackMapEntry& GetStackMap(size_t i) const { + DCHECK_LT(i, stack_maps_.Size()); + return stack_maps_.GetRawStorage()[i]; + } + + void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { + DCHECK_LT(i, stack_maps_.Size()); + stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + } + + uint32_t ComputeMaxNativePcOffset() const; + // Prepares the stream to fill in a memory region. Must be called before FillIn. // Returns the size (in bytes) needed to store this stream. size_t PrepareForFillIn(); @@ -163,7 +178,6 @@ class StackMapStream : public ValueObject { GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; - uint32_t native_pc_offset_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; |