diff options
Diffstat (limited to 'compiler/optimizing')
72 files changed, 10316 insertions, 3619 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index f985745e7a..f0cafc847f 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -61,40 +61,6 @@ static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne(); } -// Returns an instruction with the opposite boolean value from 'cond'. -static HInstruction* GetOppositeCondition(HInstruction* cond) { - HGraph* graph = cond->GetBlock()->GetGraph(); - ArenaAllocator* allocator = graph->GetArena(); - - if (cond->IsCondition()) { - HInstruction* lhs = cond->InputAt(0); - HInstruction* rhs = cond->InputAt(1); - switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* - case kCondEQ: return new (allocator) HEqual(lhs, rhs); - case kCondNE: return new (allocator) HNotEqual(lhs, rhs); - case kCondLT: return new (allocator) HLessThan(lhs, rhs); - case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs); - case kCondGT: return new (allocator) HGreaterThan(lhs, rhs); - case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs); - case kCondB: return new (allocator) HBelow(lhs, rhs); - case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs); - case kCondA: return new (allocator) HAbove(lhs, rhs); - case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs); - } - } else if (cond->IsIntConstant()) { - HIntConstant* int_const = cond->AsIntConstant(); - if (int_const->IsZero()) { - return graph->GetIntConstant(1); - } else { - DCHECK(int_const->IsOne()); - return graph->GetIntConstant(0); - } - } - // General case when 'cond' is another instruction of type boolean, - // as verified by SSAChecker. - return new (allocator) HBooleanNot(cond); -} - void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { DCHECK(block->EndsWithIf()); @@ -126,10 +92,7 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { HInstruction* replacement; if (NegatesCondition(true_value, false_value)) { - replacement = GetOppositeCondition(if_condition); - if (replacement->GetBlock() == nullptr) { - block->InsertInstructionBefore(replacement, if_instruction); - } + replacement = graph_->InsertOppositeCondition(if_condition, if_instruction); } else if (PreservesCondition(true_value, false_value)) { replacement = if_condition; } else { diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index bcc32403d3..a44830207d 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -20,6 +20,7 @@ #include "base/arena_containers.h" #include "induction_var_range.h" +#include "side_effects_analysis.h" #include "nodes.h" namespace art { @@ -175,6 +176,24 @@ class ValueBound : public ValueObject { return false; } + // Returns if it's certain this->bound > `bound`. + bool GreaterThan(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ > bound.constant_; + } + // Not comparable. Just return false. + return false; + } + + // Returns if it's certain this->bound < `bound`. + bool LessThan(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ < bound.constant_; + } + // Not comparable. Just return false. + return false; + } + // Try to narrow lower bound. Returns the greatest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) { @@ -252,157 +271,6 @@ class ValueBound : public ValueObject { int32_t constant_; }; -// Collect array access data for a loop. -// TODO: make it work for multiple arrays inside the loop. -class ArrayAccessInsideLoopFinder : public ValueObject { - public: - explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable) - : induction_variable_(induction_variable), - found_array_length_(nullptr), - offset_low_(std::numeric_limits<int32_t>::max()), - offset_high_(std::numeric_limits<int32_t>::min()) { - Run(); - } - - HArrayLength* GetFoundArrayLength() const { return found_array_length_; } - bool HasFoundArrayLength() const { return found_array_length_ != nullptr; } - int32_t GetOffsetLow() const { return offset_low_; } - int32_t GetOffsetHigh() const { return offset_high_; } - - // Returns if `block` that is in loop_info may exit the loop, unless it's - // the loop header for loop_info. - static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) { - DCHECK(loop_info->Contains(*block)); - if (block == loop_info->GetHeader()) { - // Loop header of loop_info. Exiting loop is normal. - return false; - } - for (HBasicBlock* successor : block->GetSuccessors()) { - if (!loop_info->Contains(*successor)) { - // One of the successors exits the loop. - return true; - } - } - return false; - } - - static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) { - for (HBasicBlock* back_edge : loop_info->GetBackEdges()) { - if (!block->Dominates(back_edge)) { - return false; - } - } - return true; - } - - void Run() { - HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); - HBasicBlock* block = it_loop.Current(); - DCHECK(block == induction_variable_->GetBlock()); - // Skip loop header. Since narrowed value range of a MonotonicValueRange only - // applies to the loop body (after the test at the end of the loop header). - it_loop.Advance(); - for (; !it_loop.Done(); it_loop.Advance()) { - block = it_loop.Current(); - DCHECK(block->IsInLoop()); - if (!DominatesAllBackEdges(block, loop_info)) { - // In order not to trigger deoptimization unnecessarily, make sure - // that all array accesses collected are really executed in the loop. - // For array accesses in a branch inside the loop, don't collect the - // access. The bounds check in that branch might not be eliminated. - continue; - } - if (EarlyExit(block, loop_info)) { - // If the loop body can exit loop (like break, return, etc.), it's not guaranteed - // that the loop will loop through the full monotonic value range from - // initial_ to end_. So adding deoptimization might be too aggressive and can - // trigger deoptimization unnecessarily even if the loop won't actually throw - // AIOOBE. - found_array_length_ = nullptr; - return; - } - for (HInstruction* instruction = block->GetFirstInstruction(); - instruction != nullptr; - instruction = instruction->GetNext()) { - if (!instruction->IsBoundsCheck()) { - continue; - } - - HInstruction* length_value = instruction->InputAt(1); - if (length_value->IsIntConstant()) { - // TODO: may optimize for constant case. - continue; - } - - if (length_value->IsPhi()) { - // When adding deoptimizations in outer loops, we might create - // a phi for the array length, and update all uses of the - // length in the loop to that phi. Therefore, inner loops having - // bounds checks on the same array will use that phi. - // TODO: handle these cases. - continue; - } - - DCHECK(length_value->IsArrayLength()); - HArrayLength* array_length = length_value->AsArrayLength(); - - HInstruction* array = array_length->InputAt(0); - if (array->IsNullCheck()) { - array = array->AsNullCheck()->InputAt(0); - } - if (loop_info->Contains(*array->GetBlock())) { - // Array is defined inside the loop. Skip. - continue; - } - - if (found_array_length_ != nullptr && found_array_length_ != array_length) { - // There is already access for another array recorded for the loop. - // TODO: handle multiple arrays. - continue; - } - - HInstruction* index = instruction->AsBoundsCheck()->InputAt(0); - HInstruction* left = index; - int32_t right = 0; - if (left == induction_variable_ || - (ValueBound::IsAddOrSubAConstant(index, &left, &right) && - left == induction_variable_)) { - // For patterns like array[i] or array[i + 2]. - if (right < offset_low_) { - offset_low_ = right; - } - if (right > offset_high_) { - offset_high_ = right; - } - } else { - // Access not in induction_variable/(induction_variable_ + constant) - // format. Skip. - continue; - } - // Record this array. - found_array_length_ = array_length; - } - } - } - - private: - // The instruction that corresponds to a MonotonicValueRange. - HInstruction* induction_variable_; - - // The array length of the array that's accessed inside the loop body. - HArrayLength* found_array_length_; - - // The lowest and highest constant offsets relative to induction variable - // instruction_ in all array accesses. - // If array access are: array[i-1], array[i], array[i+1], - // offset_low_ is -1 and offset_high is 1. - int32_t offset_low_; - int32_t offset_high_; - - DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder); -}; - /** * Represent a range of lower bound and upper bound, both being inclusive. * Currently a ValueRange may be generated as a result of the following: @@ -500,18 +368,13 @@ class MonotonicValueRange : public ValueRange { : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()), induction_variable_(induction_variable), initial_(initial), - end_(nullptr), - inclusive_(false), increment_(increment), bound_(bound) {} virtual ~MonotonicValueRange() {} - HInstruction* GetInductionVariable() const { return induction_variable_; } int32_t GetIncrement() const { return increment_; } ValueBound GetBound() const { return bound_; } - void SetEnd(HInstruction* end) { end_ = end; } - void SetInclusive(bool inclusive) { inclusive_ = inclusive; } HBasicBlock* GetLoopHeader() const { DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); return induction_variable_->GetBlock(); @@ -519,23 +382,6 @@ class MonotonicValueRange : public ValueRange { MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } - HBasicBlock* GetLoopHeaderSuccesorInLoop() { - HBasicBlock* header = GetLoopHeader(); - HInstruction* instruction = header->GetLastInstruction(); - DCHECK(instruction->IsIf()); - HIf* h_if = instruction->AsIf(); - HLoopInformation* loop_info = header->GetLoopInformation(); - bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor()); - bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor()); - - // Just in case it's some strange loop structure. - if (true_successor_in_loop && false_successor_in_loop) { - return nullptr; - } - DCHECK(true_successor_in_loop || false_successor_in_loop); - return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor(); - } - // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { if (other_range == nullptr) { @@ -627,467 +473,9 @@ class MonotonicValueRange : public ValueRange { } } - // Try to add HDeoptimize's in the loop pre-header first to narrow this range. - // For example, this loop: - // - // for (int i = start; i < end; i++) { - // array[i - 1] = array[i] + array[i + 1]; - // } - // - // will be transformed to: - // - // int array_length_in_loop_body_if_needed; - // if (start >= end) { - // array_length_in_loop_body_if_needed = 0; - // } else { - // if (start < 1) deoptimize(); - // if (array == null) deoptimize(); - // array_length = array.length; - // if (end > array_length - 1) deoptimize; - // array_length_in_loop_body_if_needed = array_length; - // } - // for (int i = start; i < end; i++) { - // // No more null check and bounds check. - // // array.length value is replaced with array_length_in_loop_body_if_needed - // // in the loop body. - // array[i - 1] = array[i] + array[i + 1]; - // } - // - // We basically first go through the loop body and find those array accesses whose - // index is at a constant offset from the induction variable ('i' in the above example), - // and update offset_low and offset_high along the way. We then add the following - // deoptimizations in the loop pre-header (suppose end is not inclusive). - // if (start < -offset_low) deoptimize(); - // if (end >= array.length - offset_high) deoptimize(); - // It might be necessary to first hoist array.length (and the null check on it) out of - // the loop with another deoptimization. - // - // In order not to trigger deoptimization unnecessarily, we want to make a strong - // guarantee that no deoptimization is triggered if the loop body itself doesn't - // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop - // body must throw AIOOBE). - // This is achieved by the following: - // 1) We only process loops that iterate through the full monotonic range from - // initial_ to end_. We do the following checks to make sure that's the case: - // a) The loop doesn't have early exit (via break, return, etc.) - // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_. - // 2) We only collect array accesses of blocks in the loop body that dominate - // all loop back edges, these array accesses are guaranteed to happen - // at each loop iteration. - // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses - // when the induction variable is at initial_ and end_ must be in a legal range. - // Since the added deoptimizations are basically checking the induction variable - // at initial_ and end_ values, no deoptimization will be triggered either. - // - // A special case is the loop body isn't entered at all. In that case, we may still - // add deoptimization due to the analysis described above. In order not to trigger - // deoptimization, we do a test between initial_ and end_ first and skip over - // the added deoptimization. - ValueRange* NarrowWithDeoptimization() { - if (increment_ != 1 && increment_ != -1) { - // In order not to trigger deoptimization unnecessarily, we want to - // make sure the loop iterates through the full range from initial_ to - // end_ so that boundaries are covered by the loop. An increment of 2, - // for example, may skip end_. - return this; - } - - if (end_ == nullptr) { - // No full info to add deoptimization. - return this; - } - - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - if (!initial_->GetBlock()->Dominates(pre_header) || - !end_->GetBlock()->Dominates(pre_header)) { - // Can't add a check in loop pre-header if the value isn't available there. - return this; - } - - ArrayAccessInsideLoopFinder finder(induction_variable_); - - if (!finder.HasFoundArrayLength()) { - // No array access was found inside the loop that can benefit - // from deoptimization. - return this; - } - - if (!AddDeoptimization(finder)) { - return this; - } - - // After added deoptimizations, induction variable fits in - // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. - ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); - ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); - // We've narrowed the range after added deoptimizations. - return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); - } - - // Returns true if adding a (constant >= value) check for deoptimization - // is allowed and will benefit compiled code. - bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) { - *is_proven = false; - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - DCHECK(value->GetBlock()->Dominates(pre_header)); - - // See if we can prove the relationship first. - if (value->IsIntConstant()) { - if (value->AsIntConstant()->GetValue() >= constant) { - // Already true. - *is_proven = true; - return true; - } else { - // May throw exception. Don't add deoptimization. - // Keep bounds checks in the loops. - return false; - } - } - // Can benefit from deoptimization. - return true; - } - - // Try to filter out cases that the loop entry test will never be true. - bool LoopEntryTestUseful() { - if (initial_->IsIntConstant() && end_->IsIntConstant()) { - int32_t initial_val = initial_->AsIntConstant()->GetValue(); - int32_t end_val = end_->AsIntConstant()->GetValue(); - if (increment_ == 1) { - if (inclusive_) { - return initial_val > end_val; - } else { - return initial_val >= end_val; - } - } else { - DCHECK_EQ(increment_, -1); - if (inclusive_) { - return initial_val < end_val; - } else { - return initial_val <= end_val; - } - } - } - return true; - } - - // Returns the block for adding deoptimization. - HBasicBlock* TransformLoopForDeoptimizationIfNeeded() { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - // Deoptimization is only added when both initial_ and end_ are defined - // before the loop. - DCHECK(initial_->GetBlock()->Dominates(pre_header)); - DCHECK(end_->GetBlock()->Dominates(pre_header)); - - // If it can be proven the loop body is definitely entered (unless exception - // is thrown in the loop header for which triggering deoptimization is fine), - // there is no need for tranforming the loop. In that case, deoptimization - // will just be added in the loop pre-header. - if (!LoopEntryTestUseful()) { - return pre_header; - } - - HGraph* graph = header->GetGraph(); - graph->TransformLoopHeaderForBCE(header); - HBasicBlock* new_pre_header = header->GetDominator(); - DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader()); - HBasicBlock* if_block = new_pre_header->GetDominator(); - HBasicBlock* dummy_block = if_block->GetSuccessors()[0]; // True successor. - HBasicBlock* deopt_block = if_block->GetSuccessors()[1]; // False successor. - - dummy_block->AddInstruction(new (graph->GetArena()) HGoto()); - deopt_block->AddInstruction(new (graph->GetArena()) HGoto()); - new_pre_header->AddInstruction(new (graph->GetArena()) HGoto()); - return deopt_block; - } - - // Adds a test between initial_ and end_ to see if the loop body is entered. - // If the loop body isn't entered at all, it jumps to the loop pre-header (after - // transformation) to avoid any deoptimization. - void AddLoopBodyEntryTest() { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - HBasicBlock* if_block = pre_header->GetDominator(); - HGraph* graph = header->GetGraph(); - - HCondition* cond; - if (increment_ == 1) { - if (inclusive_) { - cond = new (graph->GetArena()) HGreaterThan(initial_, end_); - } else { - cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_); - } - } else { - DCHECK_EQ(increment_, -1); - if (inclusive_) { - cond = new (graph->GetArena()) HLessThan(initial_, end_); - } else { - cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_); - } - } - HIf* h_if = new (graph->GetArena()) HIf(cond); - if_block->AddInstruction(cond); - if_block->AddInstruction(h_if); - } - - // Adds a check that (value >= constant), and HDeoptimize otherwise. - void AddDeoptimizationConstant(HInstruction* value, - int32_t constant, - HBasicBlock* deopt_block, - bool loop_entry_test_block_added) { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); - if (loop_entry_test_block_added) { - DCHECK(deopt_block->GetSuccessors()[0] == pre_header); - } else { - DCHECK(deopt_block == pre_header); - } - HGraph* graph = header->GetGraph(); - HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); - if (loop_entry_test_block_added) { - DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]); - } - - HIntConstant* const_instr = graph->GetIntConstant(constant); - HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); - HDeoptimize* deoptimize = new (graph->GetArena()) - HDeoptimize(cond, suspend_check->GetDexPc()); - deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); - deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction()); - deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), header); - } - - // Returns true if adding a (value <= array_length + offset) check for deoptimization - // is allowed and will benefit compiled code. - bool CanAddDeoptimizationArrayLength(HInstruction* value, - HArrayLength* array_length, - int32_t offset, - bool* is_proven) { - *is_proven = false; - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - DCHECK(value->GetBlock()->Dominates(pre_header)); - - if (array_length->GetBlock() == header) { - // array_length_in_loop_body_if_needed only has correct value when the loop - // body is entered. We bail out in this case. Usually array_length defined - // in the loop header is already hoisted by licm. - return false; - } else { - // array_length is defined either before the loop header already, or in - // the loop body since it's used in the loop body. If it's defined in the loop body, - // a phi array_length_in_loop_body_if_needed is used to replace it. In that case, - // all the uses of array_length must be dominated by its definition in the loop - // body. array_length_in_loop_body_if_needed is guaranteed to be the same as - // array_length once the loop body is entered so all the uses of the phi will - // use the correct value. - } - - if (offset > 0) { - // There might be overflow issue. - // TODO: handle this, possibly with some distance relationship between - // offset_low and offset_high, or using another deoptimization to make - // sure (array_length + offset) doesn't overflow. - return false; - } - - // See if we can prove the relationship first. - if (value == array_length) { - if (offset >= 0) { - // Already true. - *is_proven = true; - return true; - } else { - // May throw exception. Don't add deoptimization. - // Keep bounds checks in the loops. - return false; - } - } - // Can benefit from deoptimization. - return true; - } - - // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. - void AddDeoptimizationArrayLength(HInstruction* value, - HArrayLength* array_length, - int32_t offset, - HBasicBlock* deopt_block, - bool loop_entry_test_block_added) { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); - if (loop_entry_test_block_added) { - DCHECK(deopt_block->GetSuccessors()[0] == pre_header); - } else { - DCHECK(deopt_block == pre_header); - } - HGraph* graph = header->GetGraph(); - HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); - - // We may need to hoist null-check and array_length out of loop first. - if (!array_length->GetBlock()->Dominates(deopt_block)) { - // array_length must be defined in the loop body. - DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock())); - DCHECK(array_length->GetBlock() != header); - - HInstruction* array = array_length->InputAt(0); - HNullCheck* null_check = array->AsNullCheck(); - if (null_check != nullptr) { - array = null_check->InputAt(0); - } - // We've already made sure the array is defined before the loop when collecting - // array accesses for the loop. - DCHECK(array->GetBlock()->Dominates(deopt_block)); - if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) { - // Hoist null check out of loop with a deoptimization. - HNullConstant* null_constant = graph->GetNullConstant(); - HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); - // TODO: for one dex_pc, share the same deoptimization slow path. - HDeoptimize* null_check_deoptimize = new (graph->GetArena()) - HDeoptimize(null_check_cond, suspend_check->GetDexPc()); - deopt_block->InsertInstructionBefore( - null_check_cond, deopt_block->GetLastInstruction()); - deopt_block->InsertInstructionBefore( - null_check_deoptimize, deopt_block->GetLastInstruction()); - // Eliminate null check in the loop. - null_check->ReplaceWith(array); - null_check->GetBlock()->RemoveInstruction(null_check); - null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), header); - } - - HArrayLength* new_array_length - = new (graph->GetArena()) HArrayLength(array, array->GetDexPc()); - deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction()); - - if (loop_entry_test_block_added) { - // Replace array_length defined inside the loop body with a phi - // array_length_in_loop_body_if_needed. This is a synthetic phi so there is - // no vreg number for it. - HPhi* phi = new (graph->GetArena()) HPhi( - graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt); - // Set to 0 if the loop body isn't entered. - phi->SetRawInputAt(0, graph->GetIntConstant(0)); - // Set to array.length if the loop body is entered. - phi->SetRawInputAt(1, new_array_length); - pre_header->AddPhi(phi); - array_length->ReplaceWith(phi); - // Make sure phi is only used after the loop body is entered. - if (kIsDebugBuild) { - for (HUseIterator<HInstruction*> it(phi->GetUses()); - !it.Done(); - it.Advance()) { - HInstruction* user = it.Current()->GetUser(); - DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock())); - } - } - } else { - array_length->ReplaceWith(new_array_length); - } - - array_length->GetBlock()->RemoveInstruction(array_length); - // Use new_array_length for deopt. - array_length = new_array_length; - } - - HInstruction* added = array_length; - if (offset != 0) { - HIntConstant* offset_instr = graph->GetIntConstant(offset); - added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); - deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction()); - } - HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added); - HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); - deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); - deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction()); - deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header); - } - - // Adds deoptimizations in loop pre-header with the collected array access - // data so that value ranges can be established in loop body. - // Returns true if deoptimizations are successfully added, or if it's proven - // it's not necessary. - bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) { - int32_t offset_low = finder.GetOffsetLow(); - int32_t offset_high = finder.GetOffsetHigh(); - HArrayLength* array_length = finder.GetFoundArrayLength(); - - HBasicBlock* pre_header = - induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader(); - if (!initial_->GetBlock()->Dominates(pre_header) || - !end_->GetBlock()->Dominates(pre_header)) { - // Can't move initial_ or end_ into pre_header for comparisons. - return false; - } - - HBasicBlock* deopt_block; - bool loop_entry_test_block_added = false; - bool is_constant_proven, is_length_proven; - - HInstruction* const_comparing_instruction; - int32_t const_compared_to; - HInstruction* array_length_comparing_instruction; - int32_t array_length_offset; - if (increment_ == 1) { - // Increasing from initial_ to end_. - const_comparing_instruction = initial_; - const_compared_to = -offset_low; - array_length_comparing_instruction = end_; - array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high; - } else { - const_comparing_instruction = end_; - const_compared_to = inclusive_ ? -offset_low : -offset_low - 1; - array_length_comparing_instruction = initial_; - array_length_offset = -offset_high - 1; - } - - if (CanAddDeoptimizationConstant(const_comparing_instruction, - const_compared_to, - &is_constant_proven) && - CanAddDeoptimizationArrayLength(array_length_comparing_instruction, - array_length, - array_length_offset, - &is_length_proven)) { - if (!is_constant_proven || !is_length_proven) { - deopt_block = TransformLoopForDeoptimizationIfNeeded(); - loop_entry_test_block_added = (deopt_block != pre_header); - if (loop_entry_test_block_added) { - // Loop body may be entered. - AddLoopBodyEntryTest(); - } - } - if (!is_constant_proven) { - AddDeoptimizationConstant(const_comparing_instruction, - const_compared_to, - deopt_block, - loop_entry_test_block_added); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(array_length_comparing_instruction, - array_length, - array_length_offset, - deopt_block, - loop_entry_test_block_added); - } - return true; - } - return false; - } - private: HPhi* const induction_variable_; // Induction variable for this monotonic value range. HInstruction* const initial_; // Initial value. - HInstruction* end_; // End value. - bool inclusive_; // Whether end value is inclusive. const int32_t increment_; // Increment for each loop iteration. const ValueBound bound_; // Additional value bound info for initial_. @@ -1111,7 +499,9 @@ class BCEVisitor : public HGraphVisitor { return block->GetBlockId() >= initial_block_size_; } - BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis) + BCEVisitor(HGraph* graph, + const SideEffectsAnalysis& side_effects, + HInductionVarAnalysis* induction_analysis) : HGraphVisitor(graph), maps_(graph->GetBlocks().size(), ArenaSafeMap<int, ValueRange*>( @@ -1121,8 +511,17 @@ class BCEVisitor : public HGraphVisitor { first_constant_index_bounds_check_map_( std::less<int>(), graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + early_exit_loop_( + std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + taken_test_loop_( + std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), need_to_revisit_block_(false), + has_deoptimization_on_constant_subscripts_(false), initial_block_size_(graph->GetBlocks().size()), + side_effects_(side_effects), induction_range_(induction_analysis) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -1138,6 +537,17 @@ class BCEVisitor : public HGraphVisitor { } } + void Finish() { + // Preserve SSA structure which may have been broken by adding one or more + // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()). + InsertPhiNodes(); + + // Clear the loop data structures. + early_exit_loop_.clear(); + taken_test_loop_.clear(); + finite_loop_.clear(); + } + private: // Return the map of proven value ranges at the beginning of a basic block. ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { @@ -1166,23 +576,6 @@ class BCEVisitor : public HGraphVisitor { return nullptr; } - // Return the range resulting from induction variable analysis of "instruction" when the value - // is used from "context", for example, an index used from a bounds-check inside a loop body. - ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) { - InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction); - InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction); - if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && - v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { - DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); - DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); - ValueBound low = ValueBound(v1.instruction, v1.b_constant); - ValueBound up = ValueBound(v2.instruction, v2.b_constant); - return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up); - } - // Didn't find anything useful. - return nullptr; - } - // Narrow the value range of `instruction` at the end of `basic_block` with `range`, // and push the narrowed value range to `successor`. void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block, @@ -1328,17 +721,6 @@ class BCEVisitor : public HGraphVisitor { bool overflow, underflow; if (cond == kCondLT || cond == kCondLE) { - if (left_monotonic_range != nullptr) { - // Update the info for monotonic value range. - if (left_monotonic_range->GetInductionVariable() == left && - left_monotonic_range->GetIncrement() < 0 && - block == left_monotonic_range->GetLoopHeader() && - instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { - left_monotonic_range->SetEnd(right); - left_monotonic_range->SetInclusive(cond == kCondLT); - } - } - if (!upper.Equals(ValueBound::Max())) { int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); @@ -1362,17 +744,6 @@ class BCEVisitor : public HGraphVisitor { ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondGT || cond == kCondGE) { - if (left_monotonic_range != nullptr) { - // Update the info for monotonic value range. - if (left_monotonic_range->GetInductionVariable() == left && - left_monotonic_range->GetIncrement() > 0 && - block == left_monotonic_range->GetLoopHeader() && - instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { - left_monotonic_range->SetEnd(right); - left_monotonic_range->SetInclusive(cond == kCondGT); - } - } - // array.length as a lower bound isn't considered useful. if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive @@ -1398,38 +769,34 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitBoundsCheck(HBoundsCheck* bounds_check) { + void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength() || array_length->IsPhi()); - - if (array_length->IsPhi()) { - // Input 1 of the phi contains the real array.length once the loop body is - // entered. That value will be used for bound analysis. The graph is still - // strictly in SSA form. - array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength(); - } + bool try_dynamic_bce = true; if (!index->IsIntConstant()) { + // Non-constant subscript. ValueBound lower = ValueBound(nullptr, 0); // constant 0 ValueBound upper = ValueBound(array_length, -1); // array_length - 1 ValueRange array_range(GetGraph()->GetArena(), lower, upper); - // Try range obtained by local analysis. + // Try range obtained by dominator-based analysis. ValueRange* index_range = LookupValueRange(index, block); if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceBoundsCheck(bounds_check, index); + ReplaceInstruction(bounds_check, index); return; } // Try range obtained by induction variable analysis. - index_range = LookupInductionRange(bounds_check, index); - if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceBoundsCheck(bounds_check, index); + // Disables dynamic bce if OOB is certain. + if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) { + ReplaceInstruction(bounds_check, index); return; } } else { + // Constant subscript. int32_t constant = index->AsIntConstant()->GetValue(); if (constant < 0) { // Will always throw exception. @@ -1437,7 +804,7 @@ class BCEVisitor : public HGraphVisitor { } if (array_length->IsIntConstant()) { if (constant < array_length->AsIntConstant()->GetValue()) { - ReplaceBoundsCheck(bounds_check, index); + ReplaceInstruction(bounds_check, index); } return; } @@ -1448,7 +815,7 @@ class BCEVisitor : public HGraphVisitor { ValueBound lower = existing_range->GetLower(); DCHECK(lower.IsConstant()); if (constant < lower.GetConstant()) { - ReplaceBoundsCheck(bounds_check, index); + ReplaceInstruction(bounds_check, index); return; } else { // Existing range isn't strong enough to eliminate the bounds check. @@ -1483,11 +850,11 @@ class BCEVisitor : public HGraphVisitor { ValueRange(GetGraph()->GetArena(), lower, upper); GetValueRangeMap(block)->Overwrite(array_length->GetId(), range); } - } - void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) { - bounds_check->ReplaceWith(index); - bounds_check->GetBlock()->RemoveInstruction(bounds_check); + // If static analysis fails, and OOB is not certain, try dynamic elimination. + if (try_dynamic_bce) { + TryDynamicBCE(bounds_check); + } } static bool HasSameInputAtBackEdges(HPhi* phi) { @@ -1506,7 +873,7 @@ class BCEVisitor : public HGraphVisitor { return true; } - void VisitPhi(HPhi* phi) { + void VisitPhi(HPhi* phi) OVERRIDE { if (phi->IsLoopHeaderPhi() && (phi->GetType() == Primitive::kPrimInt) && HasSameInputAtBackEdges(phi)) { @@ -1553,7 +920,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitIf(HIf* instruction) { + void VisitIf(HIf* instruction) OVERRIDE { if (instruction->InputAt(0)->IsCondition()) { HCondition* cond = instruction->InputAt(0)->AsCondition(); IfCondition cmp = cond->GetCondition(); @@ -1562,42 +929,11 @@ class BCEVisitor : public HGraphVisitor { HInstruction* left = cond->GetLeft(); HInstruction* right = cond->GetRight(); HandleIf(instruction, left, right, cmp); - - HBasicBlock* block = instruction->GetBlock(); - ValueRange* left_range = LookupValueRange(left, block); - if (left_range == nullptr) { - return; - } - - if (left_range->IsMonotonicValueRange() && - block == left_range->AsMonotonicValueRange()->GetLoopHeader()) { - // The comparison is for an induction variable in the loop header. - DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); - HBasicBlock* loop_body_successor = - left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop(); - if (loop_body_successor == nullptr) { - // In case it's some strange loop structure. - return; - } - ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); - if ((new_left_range == left_range) || - // Range narrowed with deoptimization is usually more useful than - // a constant range. - new_left_range->IsConstantValueRange()) { - // We are not successful in narrowing the monotonic value range to - // a regular value range. Try using deoptimization. - new_left_range = left_range->AsMonotonicValueRange()-> - NarrowWithDeoptimization(); - if (new_left_range != left_range) { - GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range); - } - } - } } } } - void VisitAdd(HAdd* add) { + void VisitAdd(HAdd* add) OVERRIDE { HInstruction* right = add->GetRight(); if (right->IsIntConstant()) { ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock()); @@ -1611,7 +947,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitSub(HSub* sub) { + void VisitSub(HSub* sub) OVERRIDE { HInstruction* left = sub->GetLeft(); HInstruction* right = sub->GetRight(); if (right->IsIntConstant()) { @@ -1713,19 +1049,19 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitDiv(HDiv* div) { + void VisitDiv(HDiv* div) OVERRIDE { FindAndHandlePartialArrayLength(div); } - void VisitShr(HShr* shr) { + void VisitShr(HShr* shr) OVERRIDE { FindAndHandlePartialArrayLength(shr); } - void VisitUShr(HUShr* ushr) { + void VisitUShr(HUShr* ushr) OVERRIDE { FindAndHandlePartialArrayLength(ushr); } - void VisitAnd(HAnd* instruction) { + void VisitAnd(HAnd* instruction) OVERRIDE { if (instruction->GetRight()->IsIntConstant()) { int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue(); if (constant > 0) { @@ -1740,7 +1076,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitNewArray(HNewArray* new_array) { + void VisitNewArray(HNewArray* new_array) OVERRIDE { HInstruction* len = new_array->InputAt(0); if (!len->IsIntConstant()) { HInstruction *left; @@ -1764,9 +1100,12 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitDeoptimize(HDeoptimize* deoptimize) { - // Right now it's only HLessThanOrEqual. - DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual()); + void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE { + if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) { + return; + } + // If this instruction was added by AddCompareWithDeoptimization(), narrow + // the range accordingly in subsequent basic blocks. HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual(); HInstruction* instruction = less_than_or_equal->InputAt(0); if (instruction->IsArrayLength()) { @@ -1780,6 +1119,35 @@ class BCEVisitor : public HGraphVisitor { } } + /** + * After null/bounds checks are eliminated, some invariant array references + * may be exposed underneath which can be hoisted out of the loop to the + * preheader or, in combination with dynamic bce, the deoptimization block. + * + * for (int i = 0; i < n; i++) { + * <-------+ + * for (int j = 0; j < n; j++) | + * a[i][j] = 0; --a[i]--+ + * } + * + * Note: this optimization is no longer applied after deoptimization on array references + * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in + * those cases it would be unsafe to hoist array references across their deoptimization + * instruction inside a loop. + */ + void VisitArrayGet(HArrayGet* array_get) OVERRIDE { + if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) { + HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation(); + if (loop->IsLoopInvariant(array_get->InputAt(0), false) && + loop->IsLoopInvariant(array_get->InputAt(1), false)) { + SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader()); + if (!array_get->GetSideEffects().MayDependOn(loop_effects)) { + HoistToPreheaderOrDeoptBlock(loop, array_get); + } + } + } + } + void AddCompareWithDeoptimization(HInstruction* array_length, HIntConstant* const_instr, HBasicBlock* block) { @@ -1801,6 +1169,9 @@ class BCEVisitor : public HGraphVisitor { block->InsertInstructionBefore(cond, bounds_check); block->InsertInstructionBefore(deoptimize, bounds_check); deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); + // Flag that this kind of deoptimization on array references with constant + // subscripts has occurred to prevent further hoisting of these references. + has_deoptimization_on_constant_subscripts_ = true; } void AddComparesWithDeoptimization(HBasicBlock* block) { @@ -1844,21 +1215,425 @@ class BCEVisitor : public HGraphVisitor { } } + /** + * Returns true if static range analysis based on induction variables can determine the bounds + * check on the given array range is always satisfied with the computed index range. The output + * parameter try_dynamic_bce is set to false if OOB is certain. + */ + bool InductionRangeFitsIn(ValueRange* array_range, + HInstruction* context, + HInstruction* index, + bool* try_dynamic_bce) { + InductionVarRange::Value v1; + InductionVarRange::Value v2; + bool needs_finite_test = false; + induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test); + if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && + v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { + DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); + DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); + ValueRange index_range(GetGraph()->GetArena(), + ValueBound(v1.instruction, v1.b_constant), + ValueBound(v2.instruction, v2.b_constant)); + // If analysis reveals a certain OOB, disable dynamic BCE. + *try_dynamic_bce = !index_range.GetLower().LessThan(array_range->GetLower()) && + !index_range.GetUpper().GreaterThan(array_range->GetUpper()); + // Use analysis for static bce only if loop is finite. + return !needs_finite_test && index_range.FitsIn(array_range); + } + return false; + } + + /** + * When the compiler fails to remove a bounds check statically, we try to remove the bounds + * check dynamically by adding runtime tests that trigger a deoptimization in case bounds + * will go out of range (we want to be rather certain of that given the slowdown of + * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding + * bounds checks and related null checks removed. + */ + void TryDynamicBCE(HBoundsCheck* instruction) { + HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation(); + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + // If dynamic bounds check elimination seems profitable and is possible, then proceed. + bool needs_finite_test = false; + bool needs_taken_test = false; + if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) && + induction_range_.CanGenerateCode( + instruction, index, &needs_finite_test, &needs_taken_test) && + CanHandleInfiniteLoop(loop, index, needs_finite_test) && + CanHandleLength(loop, length, needs_taken_test)) { // do this test last (may code gen) + HInstruction* lower = nullptr; + HInstruction* upper = nullptr; + // Generate the following unsigned comparisons + // if (lower > upper) deoptimize; + // if (upper >= length) deoptimize; + // or, for a non-induction index, just the unsigned comparison on its 'upper' value + // if (upper >= length) deoptimize; + // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit + // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests + // correctly guard against any possible OOB (including arithmetic wrap-around cases). + HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper); + if (lower != nullptr) { + InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper)); + } + InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length)); + ReplaceInstruction(instruction, index); + } + } + + /** + * Returns true if heuristics indicate that dynamic bce may be profitable. + */ + bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) { + if (loop != nullptr) { + // A try boundary preheader is hard to handle. + // TODO: remove this restriction + if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) { + return false; + } + // Does loop have early-exits? If so, the full range may not be covered by the loop + // at runtime and testing the range may apply deoptimization unnecessarily. + if (IsEarlyExitLoop(loop)) { + return false; + } + // Does the current basic block dominate all back edges? If not, + // don't apply dynamic bce to something that may not be executed. + for (HBasicBlock* back_edge : loop->GetBackEdges()) { + if (!block->Dominates(back_edge)) { + return false; + } + } + // Success! + return true; + } + return false; + } + + /** + * Returns true if the loop has early exits, which implies it may not cover + * the full range computed by range analysis based on induction variables. + */ + bool IsEarlyExitLoop(HLoopInformation* loop) { + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + // If loop has been analyzed earlier for early-exit, don't repeat the analysis. + auto it = early_exit_loop_.find(loop_id); + if (it != early_exit_loop_.end()) { + return it->second; + } + // First time early-exit analysis for this loop. Since analysis requires scanning + // the full loop-body, results of the analysis is stored for subsequent queries. + HBlocksInLoopReversePostOrderIterator it_loop(*loop); + for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) { + for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) { + if (!loop->Contains(*successor)) { + early_exit_loop_.Put(loop_id, true); + return true; + } + } + } + early_exit_loop_.Put(loop_id, false); + return false; + } + + /** + * Returns true if the array length is already loop invariant, or can be made so + * by handling the null check under the hood of the array length operation. + */ + bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) { + if (loop->IsLoopInvariant(length, false)) { + return true; + } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) { + if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) { + HoistToPreheaderOrDeoptBlock(loop, length); + return true; + } + } + return false; + } + + /** + * Returns true if the null check is already loop invariant, or can be made so + * by generating a deoptimization test. + */ + bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) { + if (loop->IsLoopInvariant(check, false)) { + return true; + } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) { + HInstruction* array = check->InputAt(0); + if (loop->IsLoopInvariant(array, false)) { + // Generate: if (array == null) deoptimize; + HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HInstruction* cond = + new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); + InsertDeopt(loop, block, cond); + ReplaceInstruction(check, array); + return true; + } + } + return false; + } + + /** + * Returns true if compiler can apply dynamic bce to loops that may be infinite + * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate + * the range analysis evaluation code by "overshooting" the computed range. + * Since deoptimization would be a bad choice, and there is no other version + * of the loop to use, dynamic bce in such cases is only allowed if other tests + * ensure the loop is finite. + */ + bool CanHandleInfiniteLoop( + HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) { + if (needs_infinite_test) { + // If we already forced the loop to be finite, allow directly. + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + if (finite_loop_.find(loop_id) != finite_loop_.end()) { + return true; + } + // Otherwise, allow dynamic bce if the index (which is necessarily an induction at + // this point) is the direct loop index (viz. a[i]), since then the runtime tests + // ensure upper bound cannot cause an infinite loop. + HInstruction* control = loop->GetHeader()->GetLastInstruction(); + if (control->IsIf()) { + HInstruction* if_expr = control->AsIf()->InputAt(0); + if (if_expr->IsCondition()) { + HCondition* condition = if_expr->AsCondition(); + if (index == condition->InputAt(0) || + index == condition->InputAt(1)) { + finite_loop_.insert(loop_id); + return true; + } + } + } + return false; + } + return true; + } + + /** Inserts a deoptimization test. */ + void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { + HInstruction* suspend = loop->GetSuspendCheck(); + block->InsertInstructionBefore(condition, block->GetLastInstruction()); + HDeoptimize* deoptimize = + new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc()); + block->InsertInstructionBefore(deoptimize, block->GetLastInstruction()); + if (suspend->HasEnvironment()) { + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend->GetEnvironment(), loop->GetHeader()); + } + } + + /** Hoists instruction out of the loop to preheader or deoptimization block. */ + void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { + // Use preheader unless there is an earlier generated deoptimization block since + // hoisted expressions may depend on and/or used by the deoptimization tests. + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + HBasicBlock* preheader = loop->GetPreHeader(); + HBasicBlock* block = preheader; + auto it = taken_test_loop_.find(loop_id); + if (it != taken_test_loop_.end()) { + block = it->second; + } + // Hoist the instruction. + DCHECK(!instruction->HasEnvironment()); + instruction->MoveBefore(block->GetLastInstruction()); + } + + /** + * Adds a new taken-test structure to a loop if needed (and not already done). + * The taken-test protects range analysis evaluation code to avoid any + * deoptimization caused by incorrect trip-count evaluation in non-taken loops. + * + * Returns block in which deoptimizations/invariants can be put. + * + * old_preheader + * | + * if_block <- taken-test protects deoptimization block + * / \ + * true_block false_block <- deoptimizations/invariants are placed in true_block + * \ / + * new_preheader <- may require phi nodes to preserve SSA structure + * | + * header + * + * For example, this loop: + * + * for (int i = lower; i < upper; i++) { + * array[i] = 0; + * } + * + * will be transformed to: + * + * if (lower < upper) { + * if (array == null) deoptimize; + * array_length = array.length; + * if (lower > upper) deoptimize; // unsigned + * if (upper >= array_length) deoptimize; // unsigned + * } else { + * array_length = 0; + * } + * for (int i = lower; i < upper; i++) { + * // Loop without null check and bounds check, and any array.length replaced with array_length. + * array[i] = 0; + * } + */ + HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { + // Not needed (can use preheader), or already done (can reuse)? + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + if (!needs_taken_test) { + return loop->GetPreHeader(); + } else { + auto it = taken_test_loop_.find(loop_id); + if (it != taken_test_loop_.end()) { + return it->second; + } + } + + // Generate top test structure. + HBasicBlock* header = loop->GetHeader(); + GetGraph()->TransformLoopHeaderForBCE(header); + HBasicBlock* new_preheader = loop->GetPreHeader(); + HBasicBlock* if_block = new_preheader->GetDominator(); + HBasicBlock* true_block = if_block->GetSuccessors()[0]; // True successor. + HBasicBlock* false_block = if_block->GetSuccessors()[1]; // False successor. + + // Goto instructions. + true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); + false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); + new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto()); + + // Insert the taken-test to see if the loop body is entered. If the + // loop isn't entered at all, it jumps around the deoptimization block. + if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); // placeholder + HInstruction* condition = nullptr; + induction_range_.GenerateTakenTest(header->GetLastInstruction(), + GetGraph(), + if_block, + &condition); + DCHECK(condition != nullptr); + if_block->RemoveInstruction(if_block->GetLastInstruction()); + if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition)); + + taken_test_loop_.Put(loop_id, true_block); + return true_block; + } + + /** + * Inserts phi nodes that preserve SSA structure in generated top test structures. + * All uses of instructions in the deoptimization block that reach the loop need + * a phi node in the new loop preheader to fix the dominance relation. + * + * Example: + * if_block + * / \ + * x_0 = .. false_block + * \ / + * x_1 = phi(x_0, null) <- synthetic phi + * | + * header + */ + void InsertPhiNodes() { + // Scan all new deoptimization blocks. + for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) { + HBasicBlock* true_block = it1->second; + HBasicBlock* new_preheader = true_block->GetSingleSuccessor(); + // Scan all instructions in a new deoptimization block. + for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + Primitive::Type type = instruction->GetType(); + HPhi* phi = nullptr; + // Scan all uses of an instruction and replace each later use with a phi node. + for (HUseIterator<HInstruction*> it2(instruction->GetUses()); + !it2.Done(); + it2.Advance()) { + HInstruction* user = it2.Current()->GetUser(); + if (user->GetBlock() != true_block) { + if (phi == nullptr) { + phi = NewPhi(new_preheader, instruction, type); + } + user->ReplaceInput(phi, it2.Current()->GetIndex()); + } + } + // Scan all environment uses of an instruction and replace each later use with a phi node. + for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses()); + !it2.Done(); + it2.Advance()) { + HEnvironment* user = it2.Current()->GetUser(); + if (user->GetHolder()->GetBlock() != true_block) { + if (phi == nullptr) { + phi = NewPhi(new_preheader, instruction, type); + } + user->RemoveAsUserOfInput(it2.Current()->GetIndex()); + user->SetRawEnvAt(it2.Current()->GetIndex(), phi); + phi->AddEnvUseAt(user, it2.Current()->GetIndex()); + } + } + } + } + } + + /** + * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation. + * These are synthetic phi nodes without a virtual register. + */ + HPhi* NewPhi(HBasicBlock* new_preheader, + HInstruction* instruction, + Primitive::Type type) { + HGraph* graph = GetGraph(); + HInstruction* zero; + switch (type) { + case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break; + case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break; + case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break; + default: zero = graph->GetConstant(type, 0); break; + } + HPhi* phi = new (graph->GetArena()) + HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type)); + phi->SetRawInputAt(0, instruction); + phi->SetRawInputAt(1, zero); + new_preheader->AddPhi(phi); + return phi; + } + + /** Helper method to replace an instruction with another instruction. */ + static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) { + instruction->ReplaceWith(replacement); + instruction->GetBlock()->RemoveInstruction(instruction); + } + + // A set of maps, one per basic block, from instruction to range. ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_; // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in // a block that checks a constant index against that HArrayLength. ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; + // Early-exit loop bookkeeping. + ArenaSafeMap<uint32_t, bool> early_exit_loop_; + + // Taken-test loop bookkeeping. + ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_; + + // Finite loop bookkeeping. + ArenaSet<uint32_t> finite_loop_; + // For the block, there is at least one HArrayLength instruction for which there // is more than one bounds check instruction with constant indexing. And it's // beneficial to add a compare instruction that has deoptimization fallback and // eliminate those bounds checks. bool need_to_revisit_block_; + // Flag that denotes whether deoptimization has occurred on array references + // with constant subscripts (see AddCompareWithDeoptimization()). + bool has_deoptimization_on_constant_subscripts_; + // Initial number of blocks. uint32_t initial_block_size_; + // Side effects. + const SideEffectsAnalysis& side_effects_; + // Range analysis based on induction variables. InductionVarRange induction_range_; @@ -1870,14 +1645,12 @@ void BoundsCheckElimination::Run() { return; } - BCEVisitor visitor(graph_, induction_analysis_); // Reverse post order guarantees a node's dominators are visited first. // We want to visit in the dominator-based order since if a value is known to // be bounded by a range at one instruction, it must be true that all uses of // that value dominated by that instruction fits in that range. Range of that // value can be narrowed further down in the dominator tree. - // - // TODO: only visit blocks that dominate some array accesses. + BCEVisitor visitor(graph_, side_effects_, induction_analysis_); HBasicBlock* last_visited_block = nullptr; for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); @@ -1894,6 +1667,9 @@ void BoundsCheckElimination::Run() { visitor.VisitBasicBlock(current); last_visited_block = current; } + + // Perform cleanup. + visitor.Finish(); } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index cdff3ca0ba..b9df686ffd 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -21,12 +21,16 @@ namespace art { +class SideEffectsAnalysis; class HInductionVarAnalysis; class BoundsCheckElimination : public HOptimization { public: - BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis) + BoundsCheckElimination(HGraph* graph, + const SideEffectsAnalysis& side_effects, + HInductionVarAnalysis* induction_analysis) : HOptimization(graph, kBoundsCheckEliminiationPassName), + side_effects_(side_effects), induction_analysis_(induction_analysis) {} void Run() OVERRIDE; @@ -34,6 +38,7 @@ class BoundsCheckElimination : public HOptimization { static constexpr const char* kBoundsCheckEliminiationPassName = "BCE"; private: + const SideEffectsAnalysis& side_effects_; HInductionVarAnalysis* induction_analysis_; DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination); diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index c9afdf2147..dbeb1ccc22 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -54,7 +54,7 @@ class BoundsCheckEliminationTest : public testing::Test { HInductionVarAnalysis induction(graph_); induction.Run(); - BoundsCheckElimination(graph_, &induction).Run(); + BoundsCheckElimination(graph_, side_effects, &induction).Run(); } ArenaPool pool_; diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index ed193c7b61..8e75bdcdc9 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -359,18 +359,10 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) // need a strategy for splitting exceptional edges. We split the block // after the move-exception (if present) and mark the first part not // throwing. The normal-flow edge between them will be split later. - HInstruction* first_insn = block->GetFirstInstruction(); - if (first_insn->IsLoadException()) { - // Catch block starts with a LoadException. Split the block after - // the StoreLocal and ClearException which must come after the load. - DCHECK(first_insn->GetNext()->IsStoreLocal()); - DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); - throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); - } else { - // Catch block does not load the exception. Split at the beginning - // to create an empty catch block. - throwing_block = block->SplitBefore(first_insn); - } + throwing_block = block->SplitCatchBlockAfterMoveException(); + // Move-exception does not throw and the block has throwing insructions + // so it must have been possible to split it. + DCHECK(throwing_block != nullptr); } try_block_info.Put(throwing_block->GetBlockId(), @@ -743,6 +735,79 @@ static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) { } } +ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<2> hs(soa.Self()); + + ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); + Handle<mirror::ClassLoader> class_loader(hs.NewHandle( + soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); + + ArtMethod* resolved_method = class_linker->ResolveMethod( + *dex_compilation_unit_->GetDexFile(), + method_idx, + dex_compilation_unit_->GetDexCache(), + class_loader, + /* referrer */ nullptr, + invoke_type); + + if (UNLIKELY(resolved_method == nullptr)) { + // Clean up any exception left by type resolution. + soa.Self()->ClearException(); + return nullptr; + } + + // Check access. The class linker has a fast path for looking into the dex cache + // and does not check the access if it hits it. + if (compiling_class.Get() == nullptr) { + if (!resolved_method->IsPublic()) { + return nullptr; + } + } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(), + resolved_method, + dex_compilation_unit_->GetDexCache().Get(), + method_idx)) { + return nullptr; + } + + // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not. + // We need to look at the referrer's super class vtable. + if (invoke_type == kSuper) { + if (compiling_class.Get() == nullptr) { + // Invoking a super method requires knowing the actual super class. If we did not resolve + // the compiling method's declaring class (which only happens for ahead of time compilation), + // bail out. + DCHECK(Runtime::Current()->IsAotCompiler()); + return nullptr; + } + uint16_t vtable_index = resolved_method->GetMethodIndex(); + ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry( + vtable_index, class_linker->GetImagePointerSize()); + if (actual_method != resolved_method && + !IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { + // TODO: The actual method could still be referenced in the current dex file, so we + // could try locating it. + // TODO: Remove the dex_file restriction. + return nullptr; + } + if (!actual_method->IsInvokable()) { + // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub + // could resolve the callee to the wrong method. + return nullptr; + } + resolved_method = actual_method; + } + + // Check for incompatible class changes. The class linker has a fast path for + // looking into the dex cache and does not check incompatible class changes if it hits it. + if (resolved_method->CheckIncompatibleClassChange(invoke_type)) { + return nullptr; + } + + return resolved_method; +} + bool HGraphBuilder::BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, @@ -750,22 +815,18 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, bool is_range, uint32_t* args, uint32_t register_index) { - InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); - InvokeType optimized_invoke_type = original_invoke_type; + InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); const char* descriptor = dex_file_->GetMethodShorty(method_idx); Primitive::Type return_type = Primitive::GetType(descriptor[0]); // Remove the return type from the 'proto'. size_t number_of_arguments = strlen(descriptor) - 1; - if (original_invoke_type != kStatic) { // instance call + if (invoke_type != kStatic) { // instance call // One extra argument for 'this'. number_of_arguments++; } MethodReference target_method(dex_file_, method_idx); - int32_t table_index = 0; - uintptr_t direct_code = 0; - uintptr_t direct_method = 0; // Special handling for string init. int32_t string_init_offset = 0; @@ -788,7 +849,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, method_idx, target_method, dispatch_info, - original_invoke_type, + invoke_type, kStatic /* optimized_invoke_type */, HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); return HandleStringInit(invoke, @@ -799,23 +860,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, descriptor); } - // Handle unresolved methods. - if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, - dex_pc, - true /* update_stats */, - true /* enable_devirtualization */, - &optimized_invoke_type, - &target_method, - &table_index, - &direct_code, - &direct_method)) { + ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type); + + if (resolved_method == nullptr) { MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod); HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_, number_of_arguments, return_type, dex_pc, method_idx, - original_invoke_type); + invoke_type); return HandleInvoke(invoke, number_of_vreg_arguments, args, @@ -825,21 +879,26 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, nullptr /* clinit_check */); } - // Handle resolved methods (non string init). - - DCHECK(optimized_invoke_type != kSuper); - // Potential class initialization check, in the case of a static method call. HClinitCheck* clinit_check = nullptr; HInvoke* invoke = nullptr; - if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) { + if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) { // By default, consider that the called method implicitly requires // an initialization check of its declaring method. HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit; - if (optimized_invoke_type == kStatic) { - clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement); + ScopedObjectAccess soa(Thread::Current()); + if (invoke_type == kStatic) { + clinit_check = ProcessClinitCheckForInvoke( + dex_pc, resolved_method, method_idx, &clinit_check_requirement); + } else if (invoke_type == kSuper) { + if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { + // Update the target method to the one resolved. Note that this may be a no-op if + // we resolved to the method referenced by the instruction. + method_idx = resolved_method->GetDexMethodIndex(); + target_method = MethodReference(dex_file_, method_idx); + } } HInvokeStaticOrDirect::DispatchInfo dispatch_info = { @@ -855,24 +914,26 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, method_idx, target_method, dispatch_info, - original_invoke_type, - optimized_invoke_type, + invoke_type, + invoke_type, clinit_check_requirement); - } else if (optimized_invoke_type == kVirtual) { + } else if (invoke_type == kVirtual) { + ScopedObjectAccess soa(Thread::Current()); // Needed for the method index invoke = new (arena_) HInvokeVirtual(arena_, number_of_arguments, return_type, dex_pc, method_idx, - table_index); + resolved_method->GetMethodIndex()); } else { - DCHECK_EQ(optimized_invoke_type, kInterface); + DCHECK_EQ(invoke_type, kInterface); + ScopedObjectAccess soa(Thread::Current()); // Needed for the method index invoke = new (arena_) HInvokeInterface(arena_, number_of_arguments, return_type, dex_pc, method_idx, - table_index); + resolved_method->GetDexMethodIndex()); } return HandleInvoke(invoke, @@ -884,26 +945,106 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, clinit_check); } -HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( - uint32_t dex_pc, - uint32_t method_idx, - HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { +bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) { + bool finalizable; + bool can_throw = NeedsAccessCheck(type_index, &finalizable); + + // Only the non-resolved entrypoint handles the finalizable class case. If we + // need access checks, then we haven't resolved the method and the class may + // again be finalizable. + QuickEntrypointEnum entrypoint = (finalizable || can_throw) + ? kQuickAllocObject + : kQuickAllocObjectInitialized; + ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<3> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); - ArtMethod* resolved_method = compiler_driver_->ResolveMethod( - soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic); + Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( + outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); + + if (outer_dex_cache.Get() != dex_cache.Get()) { + // We currently do not support inlining allocations across dex files. + return false; + } + + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + type_index, + outer_dex_file, + IsOutermostCompilingClass(type_index), + dex_pc, + /*needs_access_check*/ can_throw, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index)); + + current_block_->AddInstruction(load_class); + HInstruction* cls = load_class; + if (!IsInitialized(resolved_class)) { + cls = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(cls); + } + + current_block_->AddInstruction(new (arena_) HNewInstance( + cls, + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + can_throw, + finalizable, + entrypoint)); + return true; +} + +static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) + SHARED_REQUIRES(Locks::mutator_lock_) { + return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); +} - DCHECK(resolved_method != nullptr); +bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const { + if (cls.Get() == nullptr) { + return false; + } + + // `CanAssumeClassIsLoaded` will return true if we're JITting, or will + // check whether the class is in an image for the AOT compilation. + if (cls->IsInitialized() && + compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) { + return true; + } + + if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) { + return true; + } + + // TODO: We should walk over the inlined methods, but we don't pass + // that information to the builder. + if (IsSubClass(GetCompilingClass(), cls.Get())) { + return true; + } + + return false; +} +HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( + uint32_t dex_pc, + ArtMethod* resolved_method, + uint32_t method_idx, + HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Thread* self = Thread::Current(); + StackHandleScope<4> hs(self); + Handle<mirror::DexCache> dex_cache(hs.NewHandle( + dex_compilation_unit_->GetClassLinker()->FindDexCache( + self, *dex_compilation_unit_->GetDexFile()))); Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( - outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); + outer_compilation_unit_->GetClassLinker()->FindDexCache( + self, outer_dex_file))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); + Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass())); // The index at which the method's class is stored in the DexCache's type array. uint32_t storage_index = DexFile::kDexNoIndex; @@ -921,41 +1062,21 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( HClinitCheck* clinit_check = nullptr; - if (!outer_class->IsInterface() - && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) { - // If the outer class is the declaring class or a subclass - // of the declaring class, no class initialization is needed - // before the static method call. - // Note that in case of inlining, we do not need to add clinit checks - // to calls that satisfy this subclass check with any inlined methods. This - // will be detected by the optimization passes. + if (IsInitialized(resolved_method_class)) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; } else if (storage_index != DexFile::kDexNoIndex) { - // If the method's class type index is available, check - // whether we should add an explicit class initialization - // check for its declaring class before the static method call. - - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = - resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - - if (is_initialized) { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; - } else { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; - HLoadClass* load_class = new (arena_) HLoadClass( - graph_->GetCurrentMethod(), - storage_index, - *dex_compilation_unit_->GetDexFile(), - is_outer_class, - dex_pc, - /*needs_access_check*/ false); - current_block_->AddInstruction(load_class); - clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); - current_block_->AddInstruction(clinit_check); - } + *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + storage_index, + outer_dex_file, + is_outer_class, + dex_pc, + /*needs_access_check*/ false, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index)); + current_block_->AddInstruction(load_class); + clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(clinit_check); } return clinit_check; } @@ -1006,7 +1127,9 @@ bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke, return false; } - if (invoke->IsInvokeStaticOrDirect()) { + if (invoke->IsInvokeStaticOrDirect() && + HInvokeStaticOrDirect::NeedsCurrentMethodInput( + invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) { invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod()); (*argument_index)++; } @@ -1278,7 +1401,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint16_t field_index = instruction.VRegB_21c(); ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -1324,26 +1447,26 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, } } - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - + bool is_in_cache = + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index); HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(), storage_index, - *dex_compilation_unit_->GetDexFile(), + outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false); + /*needs_access_check*/ false, + is_in_cache); current_block_->AddInstruction(constant); HInstruction* cls = constant; - if (!is_initialized && !is_outer_class) { + + Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass())); + if (!IsInitialized(klass)) { cls = new (arena_) HClinitCheck(constant, dex_pc); current_block_->AddInstruction(cls); } - uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); + uint16_t class_def_index = klass->GetDexClassDefIndex(); if (is_put) { // We need to keep the class alive before loading the value. Temporaries temps(graph_); @@ -1455,7 +1578,8 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, uint32_t* args, uint32_t register_index) { HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; HInstruction* object = new (arena_) HNewArray(length, @@ -1606,19 +1730,20 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<2> hs(soa.Self()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); Handle<mirror::DexCache> dex_cache(hs.NewHandle( - dex_compilation_unit_->GetClassLinker()->FindDexCache( - soa.Self(), *dex_compilation_unit_->GetDexFile()))); + dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file))); Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc); HLoadClass* cls = new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + dex_file, IsOutermostCompilingClass(type_index), dex_pc, - !can_access); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index)); current_block_->AddInstruction(cls); // The class needs a temporary before being used by the type check. @@ -1635,9 +1760,9 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, } } -bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { +bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const { return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks( - dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index); + dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable); } void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table, @@ -2514,16 +2639,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(fake_string); UpdateLocal(register_index, fake_string, dex_pc); } else { - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) - ? kQuickAllocObjectWithAccessCheck - : kQuickAllocObject; - - current_block_->AddInstruction(new (arena_) HNewInstance( - graph_->GetCurrentMethod(), - dex_pc, - type_index, - *dex_compilation_unit_->GetDexFile(), - entrypoint)); + if (!BuildNewInstance(type_index, dex_pc)) { + return false; + } UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc); } break; @@ -2532,7 +2650,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::NEW_ARRAY: { uint16_t type_index = instruction.VRegC_22c(); HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + bool finalizable; + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; current_block_->AddInstruction(new (arena_) HNewArray(length, @@ -2750,10 +2869,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + *dex_file_, IsOutermostCompilingClass(type_index), dex_pc, - !can_access)); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index))); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 9eaa4b62c5..c3979f3dd1 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -138,7 +138,10 @@ class HGraphBuilder : public ValueObject { HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const; void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc); void InitializeParameters(uint16_t number_of_parameters); - bool NeedsAccessCheck(uint32_t type_index) const; + + // Returns whether the current method needs access check for the type. + // Output parameter finalizable is set to whether the type is finalizable. + bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const; template<typename T> void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); @@ -302,8 +305,21 @@ class HGraphBuilder : public ValueObject { HClinitCheck* ProcessClinitCheckForInvoke( uint32_t dex_pc, + ArtMethod* method, uint32_t method_idx, - HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement); + HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) + SHARED_REQUIRES(Locks::mutator_lock_); + + // Build a HNewInstance instruction. + bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc); + + // Return whether the compiler can assume `cls` is initialized. + bool IsInitialized(Handle<mirror::Class> cls) const + SHARED_REQUIRES(Locks::mutator_lock_); + + // Try to resolve a method using the class linker. Return null if a method could + // not be resolved. + ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type); ArenaAllocator* const arena_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index a1bb5e0838..0baa0e30dc 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -42,7 +42,7 @@ #include "compiled_method.h" #include "dex/verified_method.h" -#include "driver/dex_compilation_unit.h" +#include "driver/compiler_driver.h" #include "gc_map_builder.h" #include "graph_visualizer.h" #include "intrinsics.h" @@ -208,6 +208,7 @@ class DisassemblyScope { void CodeGenerator::GenerateSlowPaths() { size_t code_start = 0; for (SlowPathCode* slow_path : slow_paths_) { + current_slow_path_ = slow_path; if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); } @@ -216,6 +217,7 @@ void CodeGenerator::GenerateSlowPaths() { disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize()); } } + current_slow_path_ = nullptr; } void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { @@ -308,7 +310,7 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, + size_t maximum_number_of_live_fpu_registers, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order) { block_order_ = &block_order; @@ -322,14 +324,14 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, && IsLeafMethod() && !RequiresCurrentMethod()) { DCHECK_EQ(maximum_number_of_live_core_registers, 0u); - DCHECK_EQ(maximum_number_of_live_fp_registers, 0u); + DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u); SetFrameSize(CallPushesPC() ? GetWordSize() : 0); } else { SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize() + FrameEntrySpillSize(), kStackAlignment)); } @@ -381,11 +383,11 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect(); switch (call->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation()); + locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: locations->AddTemp(visitor->GetMethodLocation()); - locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister()); + locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister()); break; default: locations->AddTemp(visitor->GetMethodLocation()); @@ -545,15 +547,19 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( } } +// TODO: Remove argument `code_generator_supports_read_barrier` when +// all code generators have read barrier support. void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls, Location runtime_type_index_location, - Location runtime_return_location) { + Location runtime_return_location, + bool code_generator_supports_read_barrier) { ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena(); LocationSummary::CallKind call_kind = cls->NeedsAccessCheck() ? LocationSummary::kCall - : (cls->CanCallRuntime() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); + : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) || + cls->CanCallRuntime()) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind); if (cls->NeedsAccessCheck()) { locations->SetInAt(0, Location::NoLocation()); @@ -787,9 +793,10 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, } void CodeGenerator::BuildNativeGCMap( - ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const { + ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const { const std::vector<uint8_t>& gc_map_raw = - dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); + compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx()) + ->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); @@ -911,19 +918,22 @@ void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const { vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); } -void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) { - uint32_t size = stack_map_stream_.PrepareForFillIn(); - data->resize(size); - MemoryRegion region(data->data(), size); +size_t CodeGenerator::ComputeStackMapsSize() { + return stack_map_stream_.PrepareForFillIn(); +} + +void CodeGenerator::BuildStackMaps(MemoryRegion region) { stack_map_stream_.FillIn(region); } void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end) { - if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) { - src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), - static_cast<int32_t>(dex_pc)})); + if (compiler_options_.GetGenerateDebugInfo() && + dex_pc != kNoDexPc && + native_pc_begin != native_pc_end) { + src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), + static_cast<int32_t>(dex_pc)})); } } @@ -1314,21 +1324,38 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod // coherent with the runtime call generated, and that the GC side effect is // set when required. if (slow_path == nullptr) { - DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName(); + DCHECK(instruction->GetLocations()->WillCall()) + << "instruction->DebugName()=" << instruction->DebugName(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) - << instruction->DebugName() << instruction->GetSideEffects().ToString(); + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString(); } else { DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal()) - << instruction->DebugName() << slow_path->GetDescription(); + << "instruction->DebugName()=" << instruction->DebugName() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || // Control flow would not come back into the code if a fatal slow // path is taken, so we do not care if it triggers GC. slow_path->IsFatal() || // HDeoptimize is a special case: we know we are not coming back from // it into the code. - instruction->IsDeoptimize()) - << instruction->DebugName() << instruction->GetSideEffects().ToString() - << slow_path->GetDescription(); + instruction->IsDeoptimize() || + // When read barriers are enabled, some instructions use a + // slow path to emit a read barrier, which does not trigger + // GC, is not fatal, nor is emitted by HDeoptimize + // instructions. + (kEmitCompilerReadBarrier && + (instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArraySet() || + instruction->IsArrayGet() || + instruction->IsLoadClass() || + instruction->IsLoadString() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()))) + << "instruction->DebugName()=" << instruction->DebugName() + << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString() + << " slow_path->GetDescription()=" << slow_path->GetDescription(); } // Check the coherency of leaf information. @@ -1340,11 +1367,12 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod } void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); + RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (register_set->ContainsCoreRegister(i)) { + if (live_registers->ContainsCoreRegister(i)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); @@ -1359,7 +1387,7 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (register_set->ContainsFloatingPointRegister(i)) { + if (live_registers->ContainsFloatingPointRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); saved_fpu_stack_offsets_[i] = stack_offset; @@ -1370,12 +1398,14 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo } void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { - RegisterSet* register_set = locations->GetLiveRegisters(); + RegisterSet* live_registers = locations->GetLiveRegisters(); size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (!codegen->IsCoreCalleeSaveRegister(i)) { - if (register_set->ContainsCoreRegister(i)) { + if (live_registers->ContainsCoreRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } } @@ -1383,8 +1413,9 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (!codegen->IsFloatingPointCalleeSaveRegister(i)) { - if (register_set->ContainsFloatingPointRegister(i)) { + if (live_registers->ContainsFloatingPointRegister(i)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 47b6f30450..114d97be94 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -22,6 +22,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" +#include "compiled_method.h" #include "driver/compiler_options.h" #include "globals.h" #include "graph_visualizer.h" @@ -51,13 +52,9 @@ static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); class Assembler; class CodeGenerator; -class DexCompilationUnit; +class CompilerDriver; class LinkerPatch; class ParallelMoveResolver; -class SrcMapElem; -template <class Alloc> -class SrcMap; -using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; class CodeAllocator { public: @@ -204,7 +201,7 @@ class CodeGenerator { virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; void InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, + size_t maximum_number_of_live_fpu_registers, size_t number_of_out_slots, const ArenaVector<HBasicBlock*>& block_order); int32_t GetStackSlot(HLocal* local) const; @@ -253,6 +250,15 @@ class CodeGenerator { // Returns whether we should split long moves in parallel moves. virtual bool ShouldSplitLongMoves() const { return false; } + size_t GetNumberOfCoreCalleeSaveRegisters() const { + return POPCOUNT(core_callee_save_mask_); + } + + size_t GetNumberOfCoreCallerSaveRegisters() const { + DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); + return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); + } + bool IsCoreCalleeSaveRegister(int reg) const { return (core_callee_save_mask_ & (1 << reg)) != 0; } @@ -284,13 +290,12 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } - void BuildMappingTable(ArenaVector<uint8_t>* vector) const; void BuildVMapTable(ArenaVector<uint8_t>* vector) const; void BuildNativeGCMap( - ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; - void BuildStackMaps(ArenaVector<uint8_t>* vector); + ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const; + void BuildStackMaps(MemoryRegion region); + size_t ComputeStackMapsSize(); bool IsBaseline() const { return is_baseline_; @@ -420,7 +425,8 @@ class CodeGenerator { // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design. static void CreateLoadClassLocationSummary(HLoadClass* cls, Location runtime_type_index_location, - Location runtime_return_location); + Location runtime_return_location, + bool code_generator_supports_read_barrier = false); static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); @@ -446,6 +452,10 @@ class CodeGenerator { // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; + const ArenaVector<SrcMapElem>& GetSrcMappingTable() const { + return src_map_; + } + protected: // Method patch info used for recording locations of required linker patches and // target methods. The target method can be used for various purposes, whether for @@ -488,8 +498,9 @@ class CodeGenerator { stats_(stats), graph_(graph), compiler_options_(compiler_options), - src_map_(nullptr), + src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + current_slow_path_(nullptr), current_block_index_(0), is_leaf_(true), requires_current_method_(false) { @@ -557,6 +568,10 @@ class CodeGenerator { return raw_pointer_to_labels_array + block->GetBlockId(); } + SlowPathCode* GetCurrentSlowPath() { + return current_slow_path_; + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; @@ -602,9 +617,12 @@ class CodeGenerator { const CompilerOptions& compiler_options_; // Native to dex_pc map used for native debugging/profiling tools. - DefaultSrcMap* src_map_; + ArenaVector<SrcMapElem> src_map_; ArenaVector<SlowPathCode*> slow_paths_; + // The current slow path that we're generating code for. + SlowPathCode* current_slow_path_; + // The current block index in `block_order_` of the block // we are generating code for. size_t current_block_index_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3dc3b7fba0..ac6b5e823a 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -34,6 +34,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace arm { static bool ExpectedPairLayout(Location location) { @@ -74,6 +77,7 @@ class NullCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -98,6 +102,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -120,6 +125,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode { SaveLiveRegisters(codegen, instruction_->GetLocations()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); @@ -176,6 +182,7 @@ class BoundsCheckSlowPathARM : public SlowPathCode { Primitive::kPrimInt); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -211,6 +218,11 @@ class LoadClassSlowPathARM : public SlowPathCode { ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -257,6 +269,7 @@ class LoadStringSlowPathARM : public SlowPathCode { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); RestoreLiveRegisters(codegen, locations); @@ -286,15 +299,6 @@ class TypeCheckSlowPathARM : public SlowPathCode { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = locations->InAt(0).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -315,6 +319,8 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); @@ -322,6 +328,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { @@ -354,6 +361,7 @@ class DeoptimizationSlowPathARM : public SlowPathCode { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } @@ -396,6 +404,7 @@ class ArraySetSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -408,6 +417,221 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::arm::Thumb2Assembler::Lsl and + // art::arm::Thumb2Assembler::AddConstant below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddConstant(index_reg, index_reg, offset_); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_); + } + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM : public SlowPathCode { + public: + ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM); +}; + #undef __ #define __ down_cast<ArmAssembler*>(GetAssembler())-> @@ -500,7 +724,9 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); } @@ -581,7 +807,7 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { @@ -820,7 +1046,7 @@ Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type t LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const { @@ -847,7 +1073,7 @@ Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type } case Primitive::kPrimVoid: - return Location(); + return Location::NoLocation(); } UNREACHABLE(); @@ -1240,26 +1466,19 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, __ b(true_label, final_condition); } -void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, - Label* true_target, - Label* false_target, - Label* always_true_target) { +void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition, + Label* true_target_in, + Label* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. + Label fallthrough_target; + Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); Location right = locations->InAt(1); - // We don't want true_target as a nullptr. - if (true_target == nullptr) { - true_target = always_true_target; - } - bool falls_through = (false_target == nullptr); - - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - } - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: @@ -1278,103 +1497,125 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr, LOG(FATAL) << "Unexpected compare type " << type; } - if (!falls_through) { + if (false_target != &fallthrough_target) { __ b(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } } void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - if (cond->IsIntConstant()) { + Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ b(always_true_target); + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ b(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ b(false_target); + } } - } else { - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - // Condition has been materialized, compare the output to 0 - DCHECK(instruction->GetLocations()->InAt(0).IsRegister()); - __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(), - true_target); + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + // Condition has been materialized, compare the output to 0. + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); + DCHECK(cond_val.IsRegister()); + if (true_target == nullptr) { + __ CompareAndBranchIfZero(cond_val.AsRegister<Register>(), false_target); } else { - // Condition has not been materialized, use its inputs as the - // comparison and its condition as the branch condition. - Primitive::Type type = - cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; - // Is this a long or FP comparison that has been folded into the HCondition? - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), - true_target, false_target, always_true_target); - return; - } + __ CompareAndBranchIfNonZero(cond_val.AsRegister<Register>(), true_target); + } + } else { + // Condition has not been materialized. Use its inputs as the comparison and + // its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + + // If this is a long or FP comparison that has been folded into + // the HCondition, generate the comparison directly. + Primitive::Type type = condition->InputAt(0)->GetType(); + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + GenerateCompareTestAndBranch(condition, true_target, false_target); + return; + } - LocationSummary* locations = cond->GetLocations(); - DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0); - Register left = locations->InAt(0).AsRegister<Register>(); - Location right = locations->InAt(1); - if (right.IsRegister()) { - __ cmp(left, ShifterOperand(right.AsRegister<Register>())); - } else { - DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); - } - __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition())); + LocationSummary* locations = cond->GetLocations(); + DCHECK(locations->InAt(0).IsRegister()); + Register left = locations->InAt(0).AsRegister<Register>(); + Location right = locations->InAt(1); + if (right.IsRegister()) { + __ cmp(left, ShifterOperand(right.AsRegister<Register>())); + } else { + DCHECK(right.IsConstant()); + GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); + } + if (true_target == nullptr) { + __ b(false_target, ARMCondition(condition->GetOppositeCondition())); + } else { + __ b(true_target, ARMCondition(condition->GetCondition())); } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ b(false_target); } } void LocationsBuilderARM::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathARM(deoptimize); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderARM::VisitCondition(HCondition* cond) { @@ -1683,10 +1924,18 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok codegen_->GetAssembler(), codegen_->GetInstructionSetFeatures()); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + + // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. + if (invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { @@ -1747,29 +1996,39 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + LocationSummary* locations = invoke->GetLocations(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register hidden_reg = locations->GetTemp(1).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // Set the hidden argument. - __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); + // Set the hidden argument. This is safe to do this here, as R12 + // won't be modified thereafter, before the `blx` (call) instruction. + DCHECK_EQ(R12, hidden_reg); + __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); + // /* HeapReference<Class> */ temp = temp->klass_ __ LoadFromOffset(kLoadWord, temp, temp, class_offset); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); - uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value(); + uint32_t entry_point = + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, LR, temp, entry_point); @@ -2173,6 +2432,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2181,6 +2441,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -2226,6 +2487,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); break; case Primitive::kPrimDouble: @@ -2680,7 +2942,7 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { case Primitive::kPrimInt: { if (div->InputAt(1)->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); + locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue()); if (abs_imm <= 1) { @@ -2748,6 +3010,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R0, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } @@ -2762,6 +3025,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -2804,7 +3068,7 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { case Primitive::kPrimInt: { if (rem->InputAt(1)->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); + locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue()); if (abs_imm <= 1) { @@ -2890,22 +3154,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { DCHECK_EQ(R1, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } case Primitive::kPrimFloat: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } @@ -2975,17 +3243,29 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1))); - // Make the output overlap, as it will be used to hold the masked - // second input. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + if (op->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + // Make the output overlap, as it will be used to hold the masked + // second input. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } break; } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + if (op->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + // For simplicity, use kOutputOverlap even though we only require that low registers + // don't clash with high registers which the register allocator currently guarantees. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } break; } default: @@ -3006,9 +3286,9 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { case Primitive::kPrimInt: { Register out_reg = out.AsRegister<Register>(); Register first_reg = first.AsRegister<Register>(); - // Arm doesn't mask the shift count so we need to do it ourselves. if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); + // Arm doesn't mask the shift count so we need to do it ourselves. __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue)); if (op->IsShl()) { __ Lsl(out_reg, first_reg, out_reg); @@ -3036,57 +3316,115 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { Register o_h = out.AsRegisterPairHigh<Register>(); Register o_l = out.AsRegisterPairLow<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register high = first.AsRegisterPairHigh<Register>(); Register low = first.AsRegisterPairLow<Register>(); - Register second_reg = second.AsRegister<Register>(); - - if (op->IsShl()) { - __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue)); - // Shift the high part - __ Lsl(o_h, high, o_l); - // Shift the low part and `or` what overflew on the high part - __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord)); - __ Lsr(temp, low, temp); - __ orr(o_h, o_h, ShifterOperand(temp)); - // If the shift is > 32 bits, override the high part - __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord)); - __ it(PL); - __ Lsl(o_h, low, temp, PL); - // Shift the low part - __ Lsl(o_l, low, o_l); - } else if (op->IsShr()) { - __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue)); - // Shift the low part - __ Lsr(o_l, low, o_h); - // Shift the high part and `or` what underflew on the low part - __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord)); - __ Lsl(temp, high, temp); - __ orr(o_l, o_l, ShifterOperand(temp)); - // If the shift is > 32 bits, override the low part - __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord)); - __ it(PL); - __ Asr(o_l, high, temp, PL); - // Shift the high part - __ Asr(o_h, high, o_h); + if (second.IsRegister()) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + Register second_reg = second.AsRegister<Register>(); + + if (op->IsShl()) { + __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue)); + // Shift the high part + __ Lsl(o_h, high, o_l); + // Shift the low part and `or` what overflew on the high part + __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord)); + __ Lsr(temp, low, temp); + __ orr(o_h, o_h, ShifterOperand(temp)); + // If the shift is > 32 bits, override the high part + __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord)); + __ it(PL); + __ Lsl(o_h, low, temp, PL); + // Shift the low part + __ Lsl(o_l, low, o_l); + } else if (op->IsShr()) { + __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue)); + // Shift the low part + __ Lsr(o_l, low, o_h); + // Shift the high part and `or` what underflew on the low part + __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord)); + __ Lsl(temp, high, temp); + __ orr(o_l, o_l, ShifterOperand(temp)); + // If the shift is > 32 bits, override the low part + __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord)); + __ it(PL); + __ Asr(o_l, high, temp, PL); + // Shift the high part + __ Asr(o_h, high, o_h); + } else { + __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue)); + // same as Shr except we use `Lsr`s and not `Asr`s + __ Lsr(o_l, low, o_h); + __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord)); + __ Lsl(temp, high, temp); + __ orr(o_l, o_l, ShifterOperand(temp)); + __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord)); + __ it(PL); + __ Lsr(o_l, high, temp, PL); + __ Lsr(o_h, high, o_h); + } } else { - __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue)); - // same as Shr except we use `Lsr`s and not `Asr`s - __ Lsr(o_l, low, o_h); - __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord)); - __ Lsl(temp, high, temp); - __ orr(o_l, o_l, ShifterOperand(temp)); - __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord)); - __ it(PL); - __ Lsr(o_l, high, temp, PL); - __ Lsr(o_h, high, o_h); + // Register allocator doesn't create partial overlap. + DCHECK_NE(o_l, high); + DCHECK_NE(o_h, low); + int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t shift_value = static_cast<uint32_t>(cst & kMaxLongShiftValue); + if (shift_value > 32) { + if (op->IsShl()) { + __ Lsl(o_h, low, shift_value - 32); + __ LoadImmediate(o_l, 0); + } else if (op->IsShr()) { + __ Asr(o_l, high, shift_value - 32); + __ Asr(o_h, high, 31); + } else { + __ Lsr(o_l, high, shift_value - 32); + __ LoadImmediate(o_h, 0); + } + } else if (shift_value == 32) { + if (op->IsShl()) { + __ mov(o_h, ShifterOperand(low)); + __ LoadImmediate(o_l, 0); + } else if (op->IsShr()) { + __ mov(o_l, ShifterOperand(high)); + __ Asr(o_h, high, 31); + } else { + __ mov(o_l, ShifterOperand(high)); + __ LoadImmediate(o_h, 0); + } + } else if (shift_value == 1) { + if (op->IsShl()) { + __ Lsls(o_l, low, 1); + __ adc(o_h, high, ShifterOperand(high)); + } else if (op->IsShr()) { + __ Asrs(o_h, high, 1); + __ Rrx(o_l, low); + } else { + __ Lsrs(o_h, high, 1); + __ Rrx(o_l, low); + } + } else { + DCHECK(2 <= shift_value && shift_value < 32) << shift_value; + if (op->IsShl()) { + __ Lsl(o_h, high, shift_value); + __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value)); + __ Lsl(o_l, low, shift_value); + } else if (op->IsShr()) { + __ Lsr(o_l, low, shift_value); + __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value)); + __ Asr(o_h, high, shift_value); + } else { + __ Lsr(o_l, low, shift_value); + __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value)); + __ Lsr(o_h, high, shift_value); + } + } } break; } default: LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); } } @@ -3118,20 +3456,19 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { @@ -3153,6 +3490,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { @@ -3334,6 +3672,9 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, Register out_lo, Register out_hi) { if (offset != 0) { + // Ensure `out_lo` is different from `addr`, so that loading + // `offset` into `out_lo` does not clutter `addr`. + DCHECK_NE(out_lo, addr); __ LoadImmediate(out_lo, offset); __ add(IP, addr, ShifterOperand(out_lo)); addr = IP; @@ -3521,14 +3862,26 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble) && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); - bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong); + // The output overlaps in case of volatile long: we don't want the + // code generated by GenerateWideAtomicLoad to overwrite the + // object's location. Likewise, in the case of an object field get + // with read barriers enabled, we do not want the load to overwrite + // the object's location, as we need it to emit the read barrier. + bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) || + object_field_get_with_read_barrier; if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -3594,7 +3947,8 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Register base = locations->InAt(0).AsRegister<Register>(); + Location base_loc = locations->InAt(0); + Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); @@ -3674,7 +4028,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<Register>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -3818,20 +4172,31 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -3894,8 +4259,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { @@ -3958,8 +4324,17 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -3968,11 +4343,16 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(value_type)) { @@ -3980,7 +4360,6 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { } else { locations->SetInAt(2, Location::RequiresRegister()); } - if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. @@ -3990,10 +4369,11 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register array = locations->InAt(0).AsRegister<Register>(); + Location array_loc = locations->InAt(0); + Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4030,7 +4410,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); + Location value_loc = locations->InAt(2); + Register value = value_loc.AsRegister<Register>(); Register source = value; if (instruction->InputAt(2)->IsNullConstant()) { @@ -4044,6 +4425,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4056,7 +4439,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { Label done; SlowPathCode* slow_path = nullptr; - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4076,23 +4459,63 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - __ LoadFromOffset(kLoadWord, temp1, array, class_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // No need to poison/unpoison, we're comparing two poisoined references. - __ cmp(temp1, ShifterOperand(temp2)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - Label do_put; - __ b(&do_put, EQ); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // No need to poison/unpoison, we're comparing against null. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp1); + // // /* HeapReference<Class> */ temp1 = temp1->component_type_ + // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); + // + // __ cmp(temp1, ShifterOperand(temp2)); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ b(slow_path->GetEntryLabel()); } else { - __ b(slow_path->GetEntryLabel(), NE); + // /* HeapReference<Class> */ temp1 = array->klass_ + __ LoadFromOffset(kLoadWord, temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + __ cmp(temp1, ShifterOperand(temp2)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Label do_put; + __ b(&do_put, EQ); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ b(slow_path->GetEntryLabel(), NE); + } } } @@ -4116,7 +4539,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ StoreToOffset(kStoreWord, source, IP, data_offset); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4545,7 +4968,8 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(R0)); + Location::RegisterLocation(R0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { @@ -4556,33 +4980,59 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } } else { - DCHECK(cls->CanCallRuntime()); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - // TODO: We will need a read barrier here. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -4628,13 +5078,35 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -4673,45 +5145,50 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(R0)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4725,15 +5202,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ CompareAndBranchIfZero(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ LoadFromOffset(kLoadWord, target, obj, class_offset); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ LoadFromOffset(kLoadWord, out, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -4744,13 +5215,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Label loop; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ cmp(out, ShifterOperand(cls)); @@ -4761,14 +5242,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop, success; __ Bind(&loop); __ cmp(out, ShifterOperand(cls)); __ b(&success, EQ); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ b(&done); @@ -4779,14 +5270,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. Label exact_check; __ cmp(out, ShifterOperand(cls)); __ b(&exact_check, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ LoadFromOffset(kLoadWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -4797,11 +5298,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kArrayCheck: { __ cmp(out, ShifterOperand(cls)); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); @@ -4810,13 +5312,25 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ b(&done); } @@ -4842,57 +5356,61 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register temp = locations->WillCall() - ? Register(kNoRegister) - : locations->GetTemp(0).AsRegister<Register>(); - + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); Label done; // Avoid null check if we know obj is not null. @@ -4900,76 +5418,159 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfZero(obj, &done); } - if (locations->WillCall()) { - __ LoadFromOffset(kLoadWord, obj, obj, class_offset); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ cmp(temp, ShifterOperand(cls)); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ b(slow_path->GetEntryLabel(), NE); + __ b(type_check_slow_path->GetEntryLabel(), NE); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - Label loop; + Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ CompareAndBranchIfNonZero(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ cmp(temp, ShifterOperand(cls)); __ b(&loop, NE); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ CompareAndBranchIfNonZero(temp, &loop); - // Jump to the slow path to throw the exception. - __ b(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + Label check_non_primitive_component_type; __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot"); + __ CompareAndBranchIfZero(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ b(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -4985,6 +5586,11 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); } @@ -5143,19 +5749,85 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) { - if (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) { - // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; - } if (desired_dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { const DexFile& outer_dex_file = GetGraph()->GetDexFile(); @@ -5178,6 +5850,32 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr return desired_dispatch_info; } +Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); + return temp; + } + return location.AsRegister<Register>(); +} + void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // For better instruction scheduling we load the direct code pointer before the method pointer. switch (invoke->GetCodePtrLocation()) { @@ -5200,7 +5898,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -5209,13 +5907,17 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ LoadLiteral(temp.AsRegister<Register>(), DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement this type. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HArmDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { @@ -5226,10 +5928,11 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_reg = reg; __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } - // temp = current_method->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset( - kArmPointerSize).Int32Value()); + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; + __ LoadFromOffset(kLoadWord, + reg, + method_reg, + ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); // temp = temp[index_in_cache] uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index; __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); @@ -5270,13 +5973,24 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp Register temp = temp_location.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); - DCHECK(receiver.IsRegister()); - __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); + // /* HeapReference<Class> */ temp = receiver->klass_ + __ LoadFromOffset(kLoadWord, temp, receiver, class_offset); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( @@ -5290,7 +6004,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); - size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size(); + size_t size = + method_patches_.size() + + call_patches_.size() + + relative_call_patches_.size() + + /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -5316,6 +6034,28 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche info.target_method.dex_file, info.target_method.dex_method_index)); } + for (const auto& pair : dex_cache_arrays_base_labels_) { + HArmDexCacheArraysBase* base = pair.first; + const DexCacheArraysBaseLabels* labels = &pair.second; + const DexFile& dex_file = base->GetDexFile(); + size_t base_element_offset = base->GetElementOffset(); + DCHECK(labels->add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position()); + // Add MOVW patch. + DCHECK(labels->movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position()); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset, + &dex_file, + add_pc_offset, + base_element_offset)); + // Add MOVT patch. + DCHECK(labels->movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position()); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset, + &dex_file, + add_pc_offset, + base_element_offset)); + } } Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method, @@ -5427,6 +6167,23 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } } +void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); + codegen_->AddDexCacheArraysBase(base); +} + +void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + Register base_reg = base->GetLocations()->Out().AsRegister<Register>(); + CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base); + __ BindTrackedLabel(&labels->movw_label); + __ movw(base_reg, 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(base_reg, 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(base_reg, base_reg, ShifterOperand(PC)); +} + void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { DCHECK(type == Primitive::kPrimVoid); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index cef1095c5d..193add2541 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -228,15 +228,13 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateCompareWithImmediate(Register left, int32_t right); - void GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, + void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void DivRemOneOrMinusOne(HBinaryOperation* instruction); @@ -375,8 +373,83 @@ class CodeGeneratorARM : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // The PC-relative base address is loaded with three instructions, MOVW+MOVT + // to load the offset to base_reg and then ADD base_reg, PC. The offset is + // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we + // currently emit these 3 instructions together, instruction scheduling could + // split this sequence apart, so we keep separate labels for each of them. + struct DexCacheArraysBaseLabels { + DexCacheArraysBaseLabels() = default; + DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default; + + Label movw_label; + Label movt_label; + Label add_pc_label; + }; + + void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) { + DexCacheArraysBaseLabels labels; + dex_cache_arrays_base_labels_.Put(base, std::move(labels)); + } + + DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) { + auto it = dex_cache_arrays_base_labels_.find(base); + DCHECK(it != dex_cache_arrays_base_labels_.end()); + return &it->second; + } + + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*, + DexCacheArraysBaseLabels, + std::less<HArmDexCacheArraysBase*>>; Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); @@ -398,6 +471,8 @@ class CodeGeneratorARM : public CodeGenerator { // Using ArenaDeque<> which retains element addresses on push/emplace_back(). ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b0be446174..04acd9d32c 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -42,6 +42,9 @@ using namespace vixl; // NOLINT(build/namespaces) namespace art { +template<class MirrorType> +class GcRoot; + namespace arm64 { using helpers::CPURegisterFrom; @@ -68,6 +71,10 @@ using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; +// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump +// table version generates 7 instructions and num_entries literals. Compare/jump sequence will +// generates less code/data with a small num_entries. +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { @@ -427,15 +434,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = InputRegisterAt(instruction_, 0); - Register temp = WRegisterFrom(locations->GetTemp(0)); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ Ldr(temp, HeapOperand(obj, class_offset)); - arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -450,11 +448,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, - const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -490,6 +488,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } @@ -545,6 +544,293 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); }; +void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { + uint32_t num_entries = switch_instr_->GetNumEntries(); + DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold); + + // We are about to use the assembler to place literals directly. Make sure we have enough + // underlying code buffer and we have generated the jump table with right size. + CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t), + CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize); + + __ Bind(&table_start_); + const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + vixl::Label* target_label = codegen->GetLabelOf(successors[i]); + DCHECK(target_label->IsBound()); + ptrdiff_t jump_offset = target_label->location() - table_start_.location(); + DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); + DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); + Literal<int32_t> literal(jump_offset); + __ place(&literal); + } +} + +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ Ldr(out, HeapOperand(out, class_offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + + // Note: In the case of a HArrayGet instruction, when the base + // address is a HArm64IntermediateAddress instruction, it does not + // point to the array object itself, but to an offset within this + // object. However, the read barrier entry point needs the array + // object address to be passed as first argument. So we + // temporarily set back `obj_` to that address, and restore its + // initial value later. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Sub(obj_reg, obj_reg, offset_); + } + + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); + if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { + // We are about to change the value of `index_reg` (see the + // calls to vixl::MacroAssembler::Lsl and + // vixl::MacroAssembler::Mov below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg.W(), index_reg); + index_reg = free_reg; + index = LocationFrom(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Add(index_reg, index_reg, Operand(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + LocationFrom(calling_convention.GetRegisterAt(0)), + type, + nullptr); + parallel_move.AddMove(obj_, + LocationFrom(calling_convention.GetRegisterAt(1)), + type, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + LocationFrom(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); + } + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + + // Restore the value of `obj_` when it corresponds to a + // HArm64IntermediateAddress instruction. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Add(obj_reg, obj_reg, offset_); + } + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(XRegisterFrom(ref_).code()); + size_t obj = static_cast<int>(XRegisterFrom(obj_).code()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return Register(VIXLRegCodeFromART(i), kXRegSize); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // The argument of the ReadBarrierForRootSlow is not a managed + // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; + // thus we need a 64-bit move here, and we cannot use + // + // arm64_codegen->MoveLocation( + // LocationFrom(calling_convention.GetRegisterAt(0)), + // root_, + // type); + // + // which would emit a 32-bit move, as `type` is a (32-bit wide) + // reference type (`Primitive::kPrimNot`). + __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -587,6 +873,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, compiler_options, stats), block_labels_(nullptr), + jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -598,15 +885,21 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } -#undef __ #define __ GetVIXLAssembler()-> +void CodeGeneratorARM64::EmitJumpTables() { + for (auto jump_table : jump_tables_) { + jump_table->EmitTable(this); + } +} + void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { + EmitJumpTables(); // Ensure we emit the literal pool. __ FinalizeCode(); @@ -1368,13 +1661,25 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { } void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -1403,7 +1708,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W()); + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); } } @@ -1580,6 +1889,82 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + DCHECK(instruction->GetType() == Primitive::kPrimInt || + instruction->GetType() == Primitive::kPrimLong); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + if (instruction->GetInstrKind() == HInstruction::kNeg) { + locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + Primitive::Type type = instruction->GetType(); + HInstruction::InstructionKind kind = instruction->GetInstrKind(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + Register out = OutputRegister(instruction); + Register left; + if (kind != HInstruction::kNeg) { + left = InputRegisterAt(instruction, 0); + } + // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the + // shifter operand operation, the IR generating `right_reg` (input to the type + // conversion) can have a different type from the current instruction's type, + // so we manually indicate the type. + Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); + int64_t shift_amount = (type == Primitive::kPrimInt) + ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue) + : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue); + + Operand right_operand(0); + + HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) { + right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); + } else { + right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount); + } + + // Logical binary operations do not support extension operations in the + // operand. Note that VIXL would still manage if it was passed by generating + // the extension as a separate instruction. + // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. + DCHECK(!right_operand.IsExtendedRegister() || + (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && + kind != HInstruction::kNeg)); + switch (kind) { + case HInstruction::kAdd: + __ Add(out, left, right_operand); + break; + case HInstruction::kAnd: + __ And(out, left, right_operand); + break; + case HInstruction::kNeg: + DCHECK(instruction->InputAt(0)->AsConstant()->IsZero()); + __ Neg(out, right_operand); + break; + case HInstruction::kOr: + __ Orr(out, left, right_operand); + break; + case HInstruction::kSub: + __ Sub(out, left, right_operand); + break; + case HInstruction::kXor: + __ Eor(out, left, right_operand); + break; + default: + LOG(FATAL) << "Unexpected operation kind: " << kind; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -1595,23 +1980,75 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + Register res = OutputRegister(instr); + Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); + Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); + + // Avoid emitting code that could trigger Cortex A53's erratum 835769. + // This fixup should be carried out for all multiply-accumulate instructions: + // madd, msub, smaddl, smsubl, umaddl and umsubl. + if (instr->GetType() == Primitive::kPrimLong && + codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { + MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); + vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize; + if (prev->IsLoadOrStore()) { + // Make sure we emit only exactly one nop. + vixl::CodeBufferCheckScope scope(masm, + vixl::kInstructionSize, + vixl::CodeBufferCheckScope::kCheck, + vixl::CodeBufferCheckScope::kExactSize); + __ nop(); + } + } + + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Madd(res, mul_left, mul_right, accumulator); + } else { + DCHECK(instr->GetOpKind() == HInstruction::kSub); + __ Msub(res, mul_left, mul_right, accumulator); + } +} + void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Primitive::Type type = instruction->GetType(); Register obj = InputRegisterAt(instruction, 0); - Location index = instruction->GetLocations()->InAt(1); - size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); + LocationSummary* locations = instruction->GetLocations(); + Location index = locations->InAt(1); + uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); MemOperand source = HeapOperand(obj); CPURegister dest = OutputCPURegister(instruction); @@ -1643,8 +2080,22 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { codegen_->Load(type, dest, source); codegen_->MaybeRecordImplicitNullCheck(instruction); - if (instruction->GetType() == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(dest.W()); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + Location out = locations->Out(); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + // Note: when `obj_loc` is a HArm64IntermediateAddress, it does + // not contain the base address of the array object, which is + // needed by the read barrier entry point. So the read barrier + // slow path will temporarily set back `obj_loc` to the right + // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode). + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + } } } @@ -1662,12 +2113,19 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + if (Primitive::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -1677,7 +2135,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -1691,7 +2149,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { BlockPoolsScope block_pools(masm); if (!needs_write_barrier) { - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); destination = HeapOperand(array, offset); @@ -1741,7 +2199,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -1756,26 +2214,66 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - Register temp2 = temps.AcquireSameSizeAs(array); - __ Ldr(temp, HeapOperand(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, component_offset)); - __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ Cmp(temp, temp2); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - vixl::Label do_put; - __ B(eq, &do_put); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, super_offset)); - // No need to unpoison, we're comparing against null. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ Ldr(temp, HeapOperand(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); + // + // __ Cmp(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ B(slow_path->GetEntryLabel()); } else { - __ B(ne, slow_path->GetEntryLabel()); + Register temp2 = temps.AcquireSameSizeAs(array); + // /* HeapReference<Class> */ temp = array->klass_ + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ Ldr(temp, HeapOperand(temp, component_offset)); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp, temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ Ldr(temp, HeapOperand(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); + } + temps.Release(temp2); } - temps.Release(temp2); } if (kPoisonHeapReferences) { @@ -1791,7 +2289,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } __ Str(source, destination); - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -2283,38 +2781,56 @@ void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) } void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, vixl::Label* true_target, - vixl::Label* false_target, - vixl::Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - HCondition* condition = cond->AsCondition(); - - if (cond->IsIntConstant()) { - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ B(always_true_target); + vixl::Label* false_target) { + // FP branching requires both targets to be explicit. If either of the targets + // is nullptr (fallthrough) use and bind `fallthrough_target` instead. + vixl::Label fallthrough_target; + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against 1. + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ B(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ B(false_target); + } } - } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - __ Cbnz(InputRegisterAt(instruction, 0), true_target); + if (true_target == nullptr) { + __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); + } else { + __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); + } } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. - Primitive::Type type = - cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; + HCondition* condition = cond->AsCondition(); + Primitive::Type type = condition->InputAt(0)->GetType(); if (Primitive::IsFloatingPointType(type)) { - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(instruction->AsIf()->IfFalseSuccessor()); - } FPRegister lhs = InputFPRegisterAt(condition, 0); if (condition->GetLocations()->InAt(1).IsConstant()) { DCHECK(IsFloatingPointZeroConstant(condition->GetLocations()->InAt(1).GetConstant())); @@ -2324,31 +2840,45 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct __ Fcmp(lhs, InputFPRegisterAt(condition, 1)); } if (condition->IsFPConditionTrueIfNaN()) { - __ B(vs, true_target); // VS for unordered. + __ B(vs, true_target == nullptr ? &fallthrough_target : true_target); } else if (condition->IsFPConditionFalseIfNaN()) { - __ B(vs, false_target); // VS for unordered. + __ B(vs, false_target == nullptr ? &fallthrough_target : false_target); + } + if (true_target == nullptr) { + __ B(ARM64Condition(condition->GetOppositeCondition()), false_target); + } else { + __ B(ARM64Condition(condition->GetCondition()), true_target); } - __ B(ARM64Condition(condition->GetCondition()), true_target); } else { // Integer cases. Register lhs = InputRegisterAt(condition, 0); Operand rhs = InputOperandAt(condition, 1); - Condition arm64_cond = ARM64Condition(condition->GetCondition()); + + Condition arm64_cond; + vixl::Label* non_fallthrough_target; + if (true_target == nullptr) { + arm64_cond = ARM64Condition(condition->GetOppositeCondition()); + non_fallthrough_target = false_target; + } else { + arm64_cond = ARM64Condition(condition->GetCondition()); + non_fallthrough_target = true_target; + } + if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { switch (arm64_cond) { case eq: - __ Cbz(lhs, true_target); + __ Cbz(lhs, non_fallthrough_target); break; case ne: - __ Cbnz(lhs, true_target); + __ Cbnz(lhs, non_fallthrough_target); break; case lt: // Test the sign bit and branch accordingly. - __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); break; case ge: // Test the sign bit and branch accordingly. - __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); break; default: // Without the `static_cast` the compiler throws an error for @@ -2357,43 +2887,43 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct } } else { __ Cmp(lhs, rhs); - __ B(arm64_cond, true_target); + __ B(arm64_cond, non_fallthrough_target); } } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ B(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } } void LocationsBuilderARM64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { - vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - vixl::Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } @@ -2402,8 +2932,10 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM64(deoptimize); codegen_->AddSlowPath(slow_path); - vixl::Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -2424,40 +2956,44 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM64 uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); + Location out_loc = locations->Out(); Register out = OutputRegister(instruction); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); @@ -2473,15 +3009,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cbz(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ Ldr(target, HeapOperand(obj.W(), class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ Ldr(out, HeapOperand(obj.W(), class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -2492,13 +3022,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -2509,14 +3049,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -2527,14 +3077,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. vixl::Label exact_check; __ Cmp(out, cls); __ B(eq, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ Ldr(out, HeapOperand(out, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -2545,11 +3105,12 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ B(&done); break; } + case TypeCheckKind::kArrayCheck: { __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -2558,13 +3119,25 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved and interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ B(&done); } @@ -2590,58 +3163,62 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); - Register temp; - if (!locations->WillCall()) { - temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); - } - + Location temp_loc = locations->GetTemp(0); + Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCodeARM64* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCodeARM64* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); vixl::Label done; // Avoid null check if we know obj is not null. @@ -2649,76 +3226,159 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(obj, &done); } - if (locations->WillCall()) { - __ Ldr(obj, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(obj); - } else { - __ Ldr(temp, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ Cmp(temp, cls); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ B(ne, slow_path->GetEntryLabel()); + __ B(ne, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - vixl::Label loop; + vixl::Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ Cbnz(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ Cmp(temp, cls); __ B(ne, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop; __ Bind(&loop); __ Cmp(temp, cls); __ B(eq, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ Cbnz(temp, &loop); - // Jump to the slow path to throw the exception. - __ B(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + vixl::Label check_non_primitive_component_type; __ Cmp(temp, cls); __ B(eq, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ Ldr(temp, HeapOperand(temp, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ Cbnz(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ Ldrh(temp, HeapOperand(temp, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Cbz(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved + // and interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ B(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { @@ -2761,10 +3421,11 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + LocationSummary* locations = invoke->GetLocations(); + Register temp = XRegisterFrom(locations->GetTemp(0)); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); - Location receiver = invoke->GetLocations()->InAt(0); + Location receiver = locations->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); @@ -2776,14 +3437,22 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok scratch_scope.Exclude(ip1); __ Mov(ip1, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ Ldr(temp.W(), StackOperandFrom(receiver)); + // /* HeapReference<Class> */ temp = temp->klass_ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2856,41 +3525,44 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint - __ Ldr(XRegisterFrom(temp).X(), MemOperand(tr, invoke->GetStringInitOffset())); + __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. - __ Ldr(XRegisterFrom(temp).X(), DeduplicateUint64Literal(invoke->GetMethodAddress())); + __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: // Load method address from literal pool with a link-time patch. - __ Ldr(XRegisterFrom(temp).X(), + __ Ldr(XRegisterFrom(temp), DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { // Add ADRP with its PC-relative DexCache access patch. - pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetDexCacheArrayOffset()); - vixl::Label* pc_insn_label = &pc_rel_dex_cache_patches_.back().label; + pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetDexCacheArrayOffset()); + vixl::Label* pc_insn_label = &pc_relative_dex_cache_patches_.back().label; { vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ adrp(XRegisterFrom(temp).X(), 0); + __ Bind(pc_insn_label); + __ adrp(XRegisterFrom(temp), 0); } - __ Bind(pc_insn_label); // Bind after ADRP. - pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label; + pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; // Add LDR with its PC-relative DexCache access patch. - pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetDexCacheArrayOffset()); - __ Ldr(XRegisterFrom(temp).X(), MemOperand(XRegisterFrom(temp).X(), 0)); - __ Bind(&pc_rel_dex_cache_patches_.back().label); // Bind after LDR. - pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label; + pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetDexCacheArrayOffset()); + { + vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(&pc_relative_dex_cache_patches_.back().label); + __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), 0)); + pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; + } break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = XRegisterFrom(temp); Register method_reg; if (current_method.IsRegister()) { @@ -2902,7 +3574,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); } - // temp = current_method->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ Ldr(reg.X(), MemOperand(method_reg.X(), ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value())); @@ -2920,8 +3592,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { relative_call_patches_.emplace_back(invoke->GetTargetMethod()); vixl::Label* label = &relative_call_patches_.back().label; - __ Bl(label); // Arbitrarily branch to the instruction after BL, override at link time. - __ Bind(label); // Bind after BL. + vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(label); + __ bl(0); // Branch and link to itself. This will be overriden at link time. break; } case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: @@ -2934,7 +3607,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_; __ Ldr(lr, MemOperand( - XRegisterFrom(callee_method).X(), + XRegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); // lr() __ Blr(lr); @@ -2945,8 +3618,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok } void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); Register temp = XRegisterFrom(temp_in); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); @@ -2956,8 +3633,15 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te BlockPoolsScope block_pools(GetVIXLAssembler()); DCHECK(receiver.IsRegister()); - __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); + // /* HeapReference<Class> */ temp = receiver->klass_ + __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2973,7 +3657,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc method_patches_.size() + call_patches_.size() + relative_call_patches_.size() + - pc_rel_dex_cache_patches_.size(); + pc_relative_dex_cache_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -2990,14 +3674,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_method.dex_method_index)); } for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) { - linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location() - 4u, + linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(), info.target_method.dex_file, info.target_method.dex_method_index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) { - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location() - 4u, + for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(), &info.target_dex_file, - info.pc_insn_label->location() - 4u, + info.pc_insn_label->location(), info.element_offset)); } } @@ -3070,7 +3754,8 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, LocationFrom(calling_convention.GetRegisterAt(0)), - LocationFrom(vixl::x0)); + LocationFrom(vixl::x0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { @@ -3080,30 +3765,56 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } + Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); Register current_method = InputRegisterAt(cls, 0); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } } else { - DCHECK(cls->CanCallRuntime()); MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3149,12 +3860,35 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); + Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + __ Cbz(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -3189,7 +3923,11 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -3278,8 +4016,6 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); - CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, - void*, uint32_t, int32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -3301,17 +4037,12 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); - DCHECK(type_index.Is(w0)); - __ Mov(type_index, instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), @@ -3488,6 +4219,11 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } @@ -3732,9 +4468,7 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers int min_size = std::min(result_size, input_size); Register output = OutputRegister(conversion); Register source = InputRegisterAt(conversion, 0); - if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) { - __ Ubfx(output, source, 0, result_size * kBitsPerByte); - } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { // 'int' values are used directly as W registers, discarding the top // bits, so we don't need to sign-extend and can just perform a move. // We do not pass the `kDiscardForSameWReg` argument to force clearing the @@ -3743,9 +4477,11 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers // 32bit input value as a 64bit value assuming that the top 32 bits are // zero. __ Mov(output.W(), source.W()); - } else if ((result_type == Primitive::kPrimChar) || - ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { - __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + __ Ubfx(output, + output.IsX() ? source.X() : source.W(), + 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); } else { __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } @@ -3810,29 +4546,152 @@ void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); Register value_reg = InputRegisterAt(switch_instr, 0); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - vixl::Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Cbz(value_reg, succ); + // Roughly set 16 as max average assemblies generated per HIR in a graph. + static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize; + // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to + // make sure we don't emit it if the target may run out of range. + // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR + // ranges and emit the tables only as required. + static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; + + if (num_entries < kPackedSwitchJumpTableThreshold || + // Current instruction id is an upper bound of the number of HIRs in the graph. + GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + int32_t case_value = lower_bound + i; + vixl::Label* succ = codegen_->GetLabelOf(successors[i]); + if (case_value == 0) { + __ Cbz(value_reg, succ); + } else { + __ Cmp(value_reg, Operand(case_value)); + __ B(eq, succ); + } + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ B(codegen_->GetLabelOf(default_block)); + } + } else { + JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr); + codegen_->AddJumpTable(jump_table); + + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + + // Below instructions should use at most one blocked register. Since there are two blocked + // registers, we are free to block one. + Register temp_w = temps.AcquireW(); + Register index; + // Remove the bias. + if (lower_bound != 0) { + index = temp_w; + __ Sub(index, value_reg, Operand(lower_bound)); } else { - __ Cmp(value_reg, vixl::Operand(case_value)); - __ B(eq, succ); + index = value_reg; } - } - // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + // Jump to default block if index is out of the range. + __ Cmp(index, Operand(num_entries)); + __ B(hs, codegen_->GetLabelOf(default_block)); + + // In current VIXL implementation, it won't require any blocked registers to encode the + // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the + // register pressure. + Register table_base = temps.AcquireX(); + // Load jump offset from the table. + __ Adr(table_base, jump_table->GetTableStartLabel()); + Register jump_offset = temp_w; + __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); + + // Jump to target block by branching to table_base(pc related) + offset. + Register target_address = table_base; + __ Add(target_address, table_base, Operand(jump_offset, SXTW)); + __ Br(target_address); + } +} + +void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); } } +void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index ab684ea538..7950f078ad 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -81,6 +81,22 @@ class SlowPathCodeARM64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64); }; +class JumpTableARM64 : public ArenaObject<kArenaAllocSwitchTable> { + public: + explicit JumpTableARM64(HPackedSwitch* switch_instr) + : switch_instr_(switch_instr), table_start_() {} + + vixl::Label* GetTableStartLabel() { return &table_start_; } + + void EmitTable(CodeGeneratorARM64* codegen); + + private: + HPackedSwitch* const switch_instr_; + vixl::Label table_start_; + + DISALLOW_COPY_AND_ASSIGN(JumpTableARM64); +}; + static const vixl::Register kRuntimeParameterCoreRegisters[] = { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = @@ -203,9 +219,9 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, vixl::Label* true_target, - vixl::Label* false_target, - vixl::Label* always_true_target); + vixl::Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -358,6 +374,10 @@ class CodeGeneratorARM64 : public CodeGenerator { block_labels_ = CommonInitializeLabels<vixl::Label>(); } + void AddJumpTable(JumpTableARM64* jump_table) { + jump_tables_.push_back(jump_table); + } + void Finalize(CodeAllocator* allocator) OVERRIDE; // Code generation helpers. @@ -404,6 +424,51 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, @@ -422,15 +487,16 @@ class CodeGeneratorARM64 : public CodeGenerator { const DexFile& target_dex_file; uint32_t element_offset; - // NOTE: Labels are bound to the end of the patched instruction because - // we don't know if there will be a veneer or how big it will be. vixl::Label label; vixl::Label* pc_insn_label; }; + void EmitJumpTables(); + // Labels for each block that will be compiled. vixl::Label* block_labels_; // Indexed by block id. vixl::Label frame_entry_label_; + ArenaVector<JumpTableARM64*> jump_tables_; LocationsBuilderARM64 location_builder_; InstructionCodeGeneratorARM64 instruction_visitor_; @@ -447,7 +513,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Using ArenaDeque<> which retains element addresses on push/emplace_back(). ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_; + ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 29d08beb97..9dc9167824 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -40,12 +40,8 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; // We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr Register TMP = T8; static constexpr FRegister FTMP = F8; -// ART Thread Register. -static constexpr Register TR = S1; - Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -419,13 +415,11 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickInstanceofNonTrivial)); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -465,6 +459,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickDeoptimize)); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } @@ -1732,12 +1727,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) { } void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); - bool is_object = value_type == Primitive::kPrimNot; + bool needs_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - is_object ? LocationSummary::kCall : LocationSummary::kNoCall); - if (is_object) { + needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall); + if (needs_runtime_call) { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -2425,30 +2419,51 @@ void InstructionCodeGeneratorMIPS::VisitTryBoundary(HTryBoundary* try_boundary) } void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, MipsLabel* true_target, - MipsLabel* false_target, - MipsLabel* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - HCondition* condition = cond->AsCondition(); - - if (cond->IsIntConstant()) { - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ B(always_true_target); + MipsLabel* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against 1. + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ B(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ B(false_target); + } } - } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - __ Bnez(cond_val.AsRegister<Register>(), true_target); + if (true_target == nullptr) { + __ Beqz(cond_val.AsRegister<Register>(), false_target); + } else { + __ Bnez(cond_val.AsRegister<Register>(), true_target); + } } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>(); Location rhs_location = condition->GetLocations()->InAt(1); Register rhs_reg = ZERO; @@ -2460,37 +2475,46 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi rhs_reg = rhs_location.AsRegister<Register>(); } - IfCondition if_cond = condition->GetCondition(); + IfCondition if_cond; + MipsLabel* non_fallthrough_target; + if (true_target == nullptr) { + if_cond = condition->GetOppositeCondition(); + non_fallthrough_target = false_target; + } else { + if_cond = condition->GetCondition(); + non_fallthrough_target = true_target; + } + if (use_imm && rhs_imm == 0) { switch (if_cond) { case kCondEQ: - __ Beqz(lhs, true_target); + __ Beqz(lhs, non_fallthrough_target); break; case kCondNE: - __ Bnez(lhs, true_target); + __ Bnez(lhs, non_fallthrough_target); break; case kCondLT: - __ Bltz(lhs, true_target); + __ Bltz(lhs, non_fallthrough_target); break; case kCondGE: - __ Bgez(lhs, true_target); + __ Bgez(lhs, non_fallthrough_target); break; case kCondLE: - __ Blez(lhs, true_target); + __ Blez(lhs, non_fallthrough_target); break; case kCondGT: - __ Bgtz(lhs, true_target); + __ Bgtz(lhs, non_fallthrough_target); break; case kCondB: break; // always false case kCondBE: - __ Beqz(lhs, true_target); // <= 0 if zero + __ Beqz(lhs, non_fallthrough_target); // <= 0 if zero break; case kCondA: - __ Bnez(lhs, true_target); // > 0 if non-zero + __ Bnez(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(true_target); // always true + __ B(non_fallthrough_target); // always true break; } } else { @@ -2501,81 +2525,78 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi } switch (if_cond) { case kCondEQ: - __ Beq(lhs, rhs_reg, true_target); + __ Beq(lhs, rhs_reg, non_fallthrough_target); break; case kCondNE: - __ Bne(lhs, rhs_reg, true_target); + __ Bne(lhs, rhs_reg, non_fallthrough_target); break; case kCondLT: - __ Blt(lhs, rhs_reg, true_target); + __ Blt(lhs, rhs_reg, non_fallthrough_target); break; case kCondGE: - __ Bge(lhs, rhs_reg, true_target); + __ Bge(lhs, rhs_reg, non_fallthrough_target); break; case kCondLE: - __ Bge(rhs_reg, lhs, true_target); + __ Bge(rhs_reg, lhs, non_fallthrough_target); break; case kCondGT: - __ Blt(rhs_reg, lhs, true_target); + __ Blt(rhs_reg, lhs, non_fallthrough_target); break; case kCondB: - __ Bltu(lhs, rhs_reg, true_target); + __ Bltu(lhs, rhs_reg, non_fallthrough_target); break; case kCondAE: - __ Bgeu(lhs, rhs_reg, true_target); + __ Bgeu(lhs, rhs_reg, non_fallthrough_target); break; case kCondBE: - __ Bgeu(rhs_reg, lhs, true_target); + __ Bgeu(rhs_reg, lhs, non_fallthrough_target); break; case kCondA: - __ Bltu(rhs_reg, lhs, true_target); + __ Bltu(rhs_reg, lhs, non_fallthrough_target); break; } } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ B(false_target); } } void LocationsBuilderMIPS::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { - MipsLabel* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - MipsLabel* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - MipsLabel* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + MipsLabel* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathMIPS(deoptimize); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize); codegen_->AddSlowPath(slow_path); - MipsLabel* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { @@ -2616,6 +2637,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2646,8 +2668,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (is_volatile && load_type == kLoadDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2670,21 +2691,34 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + if (obj == dst) { + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst, obj, offset); + } else { + __ LoadFromOffset(kLoadWord, dst, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); + __ LoadFromOffset(load_type, dst, obj, offset); } - __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadSFromOffset(dst, obj, offset); } else { - __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadDFromOffset(dst, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } if (is_volatile) { @@ -2730,6 +2764,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2760,8 +2795,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (is_volatile && store_type == kStoreDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check. __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2784,21 +2818,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->InAt(1).IsRegisterPair()); src = locations->InAt(1).AsRegisterPairLow<Register>(); + Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); + __ StoreToOffset(kStoreWord, src, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); } else { DCHECK(locations->InAt(1).IsRegister()); src = locations->InAt(1).AsRegister<Register>(); + __ StoreToOffset(store_type, src, obj, offset); } - __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->InAt(1).IsFpuRegister()); FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreSToOffset(src, obj, offset); } else { - __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreDToOffset(src, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } // TODO: memory barriers? @@ -3009,7 +3050,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -3021,7 +3062,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); Register method_reg; if (current_method.IsRegister()) { @@ -3148,6 +3189,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { cls->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess)); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3159,21 +3201,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3456,17 +3503,12 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - Register current_method_register = calling_convention.GetRegisterAt(1); - __ Lw(current_method_register, SP, kCurrentMethodStackOffset); - // Move an uint16_t value to a register. - __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -3683,7 +3725,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmodf)); - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { @@ -3691,7 +3733,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmod)); - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } default: diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 059131dcfc..e3a2cb40ef 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -226,9 +226,9 @@ class InstructionCodeGeneratorMIPS : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, MipsLabel* true_target, - MipsLabel* false_target, - MipsLabel* always_true_target); + MipsLabel* false_target); void HandleGoto(HInstruction* got, HBasicBlock* successor); MipsAssembler* const assembler_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 55efd5f9de..bc5eb31405 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -16,19 +16,19 @@ #include "code_generator_mips64.h" +#include "art_method.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_mips64.h" -#include "art_method.h" -#include "code_generator_utils.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" -#include "utils/mips64/assembler_mips64.h" #include "utils/assembler.h" +#include "utils/mips64/assembler_mips64.h" #include "utils/stack_checks.h" namespace art { @@ -210,7 +210,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } @@ -257,7 +257,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } @@ -312,13 +312,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { - __ B(GetReturnLabel()); + __ Bc(GetReturnLabel()); } else { - __ B(mips64_codegen->GetLabelOf(successor_)); + __ Bc(mips64_codegen->GetLabelOf(successor_)); } } - Label* GetReturnLabel() { + Mips64Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } @@ -331,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { HBasicBlock* const successor_; // If `successor_` is null, the label to branch to after the suspend check. - Label return_label_; + Mips64Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64); }; @@ -366,13 +366,11 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { instruction_, dex_pc, this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -380,7 +378,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } @@ -404,6 +402,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } @@ -420,7 +419,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, : CodeGenerator(graph, kNumberOfGpuRegisters, kNumberOfFpuRegisters, - 0, // kNumberOfRegisterPairs + /* number_of_register_pairs */ 0, ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), arraysize(kCoreCalleeSaves)), ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), @@ -441,6 +440,32 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& it : *disasm_info_->GetInstructionIntervals()) { + it.second.start = __ GetAdjustedPosition(it.second.start); + it.second.end = __ GetAdjustedPosition(it.second.end); + } + for (auto& it : *disasm_info_->GetSlowPathIntervals()) { + it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); + it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); + } + } + CodeGenerator::Finalize(allocator); } @@ -603,6 +628,7 @@ void CodeGeneratorMIPS64::GenerateFrameExit() { } __ Jr(RA); + __ Nop(); __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); @@ -666,9 +692,19 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, gpr = destination.AsRegister<GpuRegister>(); } if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) { - __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant())); + int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant()); + if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + gpr = ZERO; + } else { + __ LoadConst32(gpr, value); + } } else { - __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant())); + int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant()); + if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + gpr = ZERO; + } else { + __ LoadConst64(gpr, value); + } } if (dst_type == Primitive::kPrimFloat) { __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>()); @@ -734,12 +770,22 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, // Move to stack from constant HConstant* src_cst = source.GetConstant(); StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword; + GpuRegister gpr = ZERO; if (destination.IsStackSlot()) { - __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant())); + int32_t value = GetInt32ValueOf(src_cst->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst32(gpr, value); + } } else { - __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant())); + DCHECK(destination.IsDoubleStackSlot()); + int64_t value = GetInt64ValueOf(src_cst->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst64(gpr, value); + } } - __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex()); + __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex()); } else { DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot()); @@ -755,9 +801,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, } } -void CodeGeneratorMIPS64::SwapLocations(Location loc1, - Location loc2, - Primitive::Type type ATTRIBUTE_UNUSED) { +void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) { DCHECK(!loc1.IsConstant()); DCHECK(!loc2.IsConstant()); @@ -781,12 +825,16 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, // Swap 2 FPRs FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>(); FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>(); - // TODO: Can MOV.S/MOV.D be used here to save one instruction? - // Need to distinguish float from double, right? - __ Dmfc1(TMP, r2); - __ Dmfc1(AT, r1); - __ Dmtc1(TMP, r1); - __ Dmtc1(AT, r2); + if (type == Primitive::kPrimFloat) { + __ MovS(FTMP, r1); + __ MovS(r1, r2); + __ MovS(r2, FTMP); + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + __ MovD(FTMP, r1); + __ MovD(r1, r2); + __ MovD(r2, FTMP); + } } else if (is_slot1 != is_slot2) { // Swap GPR/FPR and stack slot Location reg_loc = is_slot1 ? loc2 : loc1; @@ -800,7 +848,6 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, reg_loc.AsFpuRegister<FpuRegister>(), SP, mem_loc.GetStackIndex()); - // TODO: review this MTC1/DMTC1 move if (mem_loc.IsStackSlot()) { __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>()); } else { @@ -845,12 +892,22 @@ void CodeGeneratorMIPS64::Move(HInstruction* instruction, } else { DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot()); // Move to stack from constant + GpuRegister gpr = ZERO; if (location.IsStackSlot()) { - __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant())); - __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex()); + int32_t value = GetInt32ValueOf(instruction->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst32(gpr, value); + } + __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex()); } else { - __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue()); - __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex()); + DCHECK(location.IsDoubleStackSlot()); + int64_t value = instruction->AsLongConstant()->GetValue(); + if (value != 0) { + gpr = TMP; + __ LoadConst64(gpr, value); + } + __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex()); } } } else if (instruction->IsTemporary()) { @@ -908,7 +965,7 @@ Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const { } void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { - Label done; + Mips64Label done; GpuRegister card = AT; GpuRegister temp = TMP; __ Beqzc(value, &done); @@ -1017,6 +1074,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, // TODO: anything related to T9/GP/GOT/PIC/.so's? __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); __ Jalr(T9); + __ Nop(); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1048,7 +1106,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc __ Bind(slow_path->GetReturnLabel()); } else { __ Beqzc(TMP, codegen_->GetLabelOf(successor)); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); // slow_path will return to GetLabelOf(successor). } } @@ -1198,7 +1256,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } default: @@ -1552,6 +1610,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } break; } @@ -1638,12 +1697,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // length is limited by the maximum positive signed 32-bit integer. // Unsigned comparison of length and index checks for index < 0 // and for length <= index simultaneously. - // Mips R6 requires lhs != rhs for compact branches. - if (index == length) { - __ B(slow_path->GetEntryLabel()); - } else { - __ Bgeuc(index, length, slow_path->GetEntryLabel()); - } + __ Bgeuc(index, length, slow_path->GetEntryLabel()); } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { @@ -1707,7 +1761,7 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { switch (in_type) { case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1736,8 +1790,18 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { case Primitive::kPrimLong: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - // TODO: more efficient (direct) comparison with a constant + Location rhs_location = locations->InAt(1); + bool use_imm = rhs_location.IsConstant(); + GpuRegister rhs = ZERO; + if (use_imm) { + int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); + if (value != 0) { + rhs = AT; + __ LoadConst64(rhs, value); + } + } else { + rhs = rhs_location.AsRegister<GpuRegister>(); + } __ Slt(TMP, lhs, rhs); __ Slt(dst, rhs, lhs); __ Subu(dst, dst, TMP); @@ -1755,6 +1819,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { : QUICK_ENTRY_POINT(pCmplDouble); } codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); + if (in_type == Primitive::kPrimFloat) { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); + } else { + CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); + } + } else { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); + } else { + CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); + } + } break; } @@ -1902,6 +1979,252 @@ void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) { } } +void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Move(out, ZERO); + } else { + if (imm == -1) { + if (type == Primitive::kPrimInt) { + __ Subu(out, ZERO, dividend); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + __ Dsubu(out, ZERO, dividend); + } + } else if (out != dividend) { + __ Move(out, dividend); + } + } +} + +void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); + DCHECK(IsPowerOfTwo(abs_imm)); + int ctz_imm = CTZ(abs_imm); + + if (instruction->IsDiv()) { + if (type == Primitive::kPrimInt) { + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Srl(TMP, dividend, 31); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + } + __ Addu(out, dividend, TMP); + __ Sra(out, out, ctz_imm); + if (imm < 0) { + __ Subu(out, ZERO, out); + } + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Dsrl32(TMP, dividend, 31); + } else { + __ Dsra32(TMP, dividend, 31); + if (ctz_imm > 32) { + __ Dsrl(TMP, TMP, 64 - ctz_imm); + } else { + __ Dsrl32(TMP, TMP, 32 - ctz_imm); + } + } + __ Daddu(out, dividend, TMP); + if (ctz_imm < 32) { + __ Dsra(out, out, ctz_imm); + } else { + __ Dsra32(out, out, ctz_imm - 32); + } + if (imm < 0) { + __ Dsubu(out, ZERO, out); + } + } + } else { + if (type == Primitive::kPrimInt) { + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Sra(TMP, dividend, 31); + __ Subu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Addu(out, out, TMP); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + __ Addu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + __ Sll(out, out, 32 - ctz_imm); + __ Srl(out, out, 32 - ctz_imm); + } + __ Subu(out, out, TMP); + } + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Dsra32(TMP, dividend, 31); + __ Dsubu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Daddu(out, out, TMP); + } else { + __ Dsra32(TMP, dividend, 31); + if (ctz_imm > 32) { + __ Dsrl(TMP, TMP, 64 - ctz_imm); + } else { + __ Dsrl32(TMP, TMP, 32 - ctz_imm); + } + __ Daddu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + if (ctz_imm > 32) { + __ Dsll(out, out, 64 - ctz_imm); + __ Dsrl(out, out, 64 - ctz_imm); + } else { + __ Dsll32(out, out, 32 - ctz_imm); + __ Dsrl32(out, out, 32 - ctz_imm); + } + } + __ Dsubu(out, out, TMP); + } + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, + (type == Primitive::kPrimLong), + &magic, + &shift); + + if (type == Primitive::kPrimInt) { + __ LoadConst32(TMP, magic); + __ MuhR6(TMP, dividend, TMP); + + if (imm > 0 && magic < 0) { + __ Addu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Subu(TMP, TMP, dividend); + } + + if (shift != 0) { + __ Sra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Sra(out, TMP, 31); + __ Subu(out, TMP, out); + } else { + __ Sra(AT, TMP, 31); + __ Subu(AT, TMP, AT); + __ LoadConst32(TMP, imm); + __ MulR6(TMP, AT, TMP); + __ Subu(out, dividend, TMP); + } + } else { + __ LoadConst64(TMP, magic); + __ Dmuh(TMP, dividend, TMP); + + if (imm > 0 && magic < 0) { + __ Daddu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Dsubu(TMP, TMP, dividend); + } + + if (shift >= 32) { + __ Dsra32(TMP, TMP, shift - 32); + } else if (shift > 0) { + __ Dsra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Dsra32(out, TMP, 31); + __ Dsubu(out, TMP, out); + } else { + __ Dsra32(AT, TMP, 31); + __ Dsubu(AT, TMP, AT); + __ LoadConst64(TMP, imm); + __ Dmul(TMP, AT, TMP); + __ Dsubu(out, dividend, TMP); + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + + LocationSummary* locations = instruction->GetLocations(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int64_t imm = Int64FromConstant(second.GetConstant()); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(std::abs(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister divisor = second.AsRegister<GpuRegister>(); + if (instruction->IsDiv()) { + if (type == Primitive::kPrimInt) + __ DivR6(out, dividend, divisor); + else + __ Ddiv(out, dividend, divisor); + } else { + if (type == Primitive::kPrimInt) + __ ModR6(out, dividend, divisor); + else + __ Dmod(out, dividend, divisor); + } + } +} + void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); @@ -1909,7 +2232,7 @@ void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1931,16 +2254,9 @@ void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) { switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) - __ DivR6(dst, lhs, rhs); - else - __ Ddiv(dst, lhs, rhs); + case Primitive::kPrimLong: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -1984,7 +2300,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (value.IsConstant()) { int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); if (divisor == 0) { - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); } else { // A division by a non-null constant is valid. We don't need to perform // any check, so simply fall through. @@ -2036,7 +2352,7 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); } if (!codegen_->GoesToNextBlock(block, successor)) { - __ B(codegen_->GetLabelOf(successor)); + __ Bc(codegen_->GetLabelOf(successor)); } } @@ -2060,30 +2376,51 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary } void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, - Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - HCondition* condition = cond->AsCondition(); - - if (cond->IsIntConstant()) { - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ B(always_true_target); - } - return; + size_t condition_input_index, + Mips64Label* true_target, + Mips64Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against 1. + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ Bc(true_target); + } } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ Bc(false_target); + } } - } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = instruction->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target); + if (true_target == nullptr) { + __ Beqzc(cond_val.AsRegister<GpuRegister>(), false_target); + } else { + __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target); + } } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = condition->GetLocations()->InAt(1); GpuRegister rhs_reg = ZERO; @@ -2095,37 +2432,46 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc rhs_reg = rhs_location.AsRegister<GpuRegister>(); } - IfCondition if_cond = condition->GetCondition(); + IfCondition if_cond; + Mips64Label* non_fallthrough_target; + if (true_target == nullptr) { + if_cond = condition->GetOppositeCondition(); + non_fallthrough_target = false_target; + } else { + if_cond = condition->GetCondition(); + non_fallthrough_target = true_target; + } + if (use_imm && rhs_imm == 0) { switch (if_cond) { case kCondEQ: - __ Beqzc(lhs, true_target); + __ Beqzc(lhs, non_fallthrough_target); break; case kCondNE: - __ Bnezc(lhs, true_target); + __ Bnezc(lhs, non_fallthrough_target); break; case kCondLT: - __ Bltzc(lhs, true_target); + __ Bltzc(lhs, non_fallthrough_target); break; case kCondGE: - __ Bgezc(lhs, true_target); + __ Bgezc(lhs, non_fallthrough_target); break; case kCondLE: - __ Blezc(lhs, true_target); + __ Blezc(lhs, non_fallthrough_target); break; case kCondGT: - __ Bgtzc(lhs, true_target); + __ Bgtzc(lhs, non_fallthrough_target); break; case kCondB: break; // always false case kCondBE: - __ Beqzc(lhs, true_target); // <= 0 if zero + __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero break; case kCondA: - __ Bnezc(lhs, true_target); // > 0 if non-zero + __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(true_target); // always true + __ Bc(non_fallthrough_target); // always true break; } } else { @@ -2133,96 +2479,69 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc rhs_reg = TMP; __ LoadConst32(rhs_reg, rhs_imm); } - // It looks like we can get here with lhs == rhs. Should that be possible at all? - // Mips R6 requires lhs != rhs for compact branches. - if (lhs == rhs_reg) { - DCHECK(!use_imm); - switch (if_cond) { - case kCondEQ: - case kCondGE: - case kCondLE: - case kCondBE: - case kCondAE: - // if lhs == rhs for a positive condition, then it is a branch - __ B(true_target); - break; - case kCondNE: - case kCondLT: - case kCondGT: - case kCondB: - case kCondA: - // if lhs == rhs for a negative condition, then it is a NOP - break; - } - } else { - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, true_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, true_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, true_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, true_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, true_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, true_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, true_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, true_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, true_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, true_target); - break; - } + switch (if_cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, non_fallthrough_target); + break; } } } - if (false_target != nullptr) { - __ B(false_target); + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { + __ Bc(false_target); } } void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } } @@ -2231,8 +2550,10 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS64(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, @@ -2387,7 +2708,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - Label done; + Mips64Label done; // Return 0 if `obj` is null. // TODO: Avoid this check if we know `obj` is not null. @@ -2482,6 +2803,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2512,10 +2834,12 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // allocation of a register for the current method pointer like on x86 baseline. // TODO: remove this once all the issues with register saving/restoring are // sorted out. - LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + if (invoke->HasCurrentMethodInput()) { + LocationSummary* locations = invoke->GetLocations(); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); + if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); + } } } @@ -2572,7 +2896,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress()); @@ -2584,7 +2908,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); GpuRegister reg = temp.AsRegister<GpuRegister>(); GpuRegister method_reg; if (current_method.IsRegister()) { @@ -2614,13 +2938,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Jialc(&frame_entry_label_, T9); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // LR = invoke->GetDirectCodePtr(); __ LoadConst64(T9, invoke->GetDirectCodePtr()); // LR() __ Jalr(T9); + __ Nop(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: @@ -2637,6 +2962,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kMips64WordSize).Int32Value()); // T9() __ Jalr(T9); + __ Nop(); break; } DCHECK(!IsLeafMethod()); @@ -2659,29 +2985,38 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - if (TryGenerateIntrinsicCode(invoke, codegen_)) { - return; - } +void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + GpuRegister receiver = calling_convention.GetRegisterAt(0); - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); - GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp = temp_location.AsRegister<GpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kMips64PointerSize).SizeValue(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize); // temp = object->GetClass(); - DCHECK(receiver.IsRegister()); - __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset); - codegen_->MaybeRecordImplicitNullCheck(invoke); + __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset); + MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); +} + +void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2691,7 +3026,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(A0)); + calling_convention.GetReturnLocation(cls->GetType())); } void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { @@ -2702,6 +3037,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -2713,22 +3049,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadUnsignedWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadDoubleword, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); // TODO: We will need a read barrier here. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqzc(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -2818,7 +3158,11 @@ void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderMIPS64::VisitMul(HMul* mul) { @@ -2952,15 +3296,12 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - // Move an uint16_t value to a register. - __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex()); codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), @@ -3108,7 +3449,7 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -3128,26 +3469,23 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) { void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { Primitive::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) - __ ModR6(dst, lhs, rhs); - else - __ Dmod(dst, lhs, rhs); + case Primitive::kPrimLong: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } default: @@ -3457,6 +3795,11 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); @@ -3472,6 +3815,19 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + } + } else { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -3623,7 +3979,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); for (int32_t i = 0; i < num_entries; i++) { int32_t case_value = lower_bound + i; - Label* succ = codegen_->GetLabelOf(successors[i]); + Mips64Label* succ = codegen_->GetLabelOf(successors[i]); if (case_value == 0) { __ Beqzc(value_reg, succ); } else { @@ -3634,7 +3990,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + __ Bc(codegen_->GetLabelOf(default_block)); } } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 9bbd02759a..85e3a4a3ce 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -119,9 +119,12 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention { Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::RegisterLocation(V0); } - Location GetSetValueLocation( - Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE { - return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1); + Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { + return Primitive::Is64BitType(type) + ? Location::RegisterLocation(A2) + : (is_instance + ? Location::RegisterLocation(A2) + : Location::RegisterLocation(A1)); } Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::FpuRegisterLocation(F0); @@ -155,12 +158,12 @@ class SlowPathCodeMIPS64 : public SlowPathCode { public: SlowPathCodeMIPS64() : entry_label_(), exit_label_() {} - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } + Mips64Label* GetEntryLabel() { return &entry_label_; } + Mips64Label* GetExitLabel() { return &exit_label_; } private: - Label entry_label_; - Label exit_label_; + Mips64Label entry_label_; + Mips64Label exit_label_; DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64); }; @@ -227,9 +230,13 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, - Label* true_target, - Label* false_target, - Label* always_true_target); + size_t condition_input_index, + Mips64Label* true_target, + Mips64Label* false_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); Mips64Assembler* const assembler_; @@ -258,7 +265,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; } uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { - return GetLabelOf(block)->Position(); + return assembler_.GetLabelLocation(GetLabelOf(block)); } HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -291,12 +298,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { return isa_features_; } - Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_, block); + Mips64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Mips64Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_ = CommonInitializeLabels<Label>(); + block_labels_ = CommonInitializeLabels<Mips64Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -333,10 +340,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { MethodReference target_method) OVERRIDE; void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED, - Location temp ATTRIBUTE_UNUSED) OVERRIDE { - UNIMPLEMENTED(FATAL); - } + void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { @@ -345,8 +349,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - Label* block_labels_; // Indexed by block id. - Label frame_entry_label_; + Mips64Label* block_labels_; // Indexed by block id. + Mips64Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index 921c1d86c2..644a3fb75e 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -15,6 +15,7 @@ */ #include "code_generator_utils.h" +#include "nodes.h" #include "base/logging.h" @@ -94,4 +95,8 @@ void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, *shift = is_long ? p - 64 : p - 32; } +bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input) { + return !cond_input->IsCondition() || cond_input->AsCondition()->NeedsMaterialization(); +} + } // namespace art diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h index 59b495c2c9..7efed8c9ec 100644 --- a/compiler/optimizing/code_generator_utils.h +++ b/compiler/optimizing/code_generator_utils.h @@ -21,10 +21,17 @@ namespace art { +class HInstruction; + // Computes the magic number and the shift needed in the div/rem by constant algorithm, as out // arguments `magic` and `shift` void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift); +// Returns true if `cond_input` is expected to have a location. Assumes that +// `cond_input` is a conditional input of the currently emitted instruction and +// that it has been previously visited by the InstructionCodeGenerator. +bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0df7e3b30a..2fb87d3029 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -19,7 +19,6 @@ #include "art_method.h" #include "code_generator_utils.h" #include "compiled_method.h" -#include "constant_area_fixups_x86.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -27,6 +26,7 @@ #include "intrinsics_x86.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "pc_relative_fixups_x86.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -35,6 +35,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; @@ -64,6 +67,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -90,6 +94,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -149,6 +154,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -174,6 +180,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -219,6 +226,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); @@ -254,6 +262,11 @@ class LoadClassSlowPathX86 : public SlowPathCode { x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType), at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -300,15 +313,6 @@ class TypeCheckSlowPathX86 : public SlowPathCode { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = locations->InAt(0).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -329,12 +333,15 @@ class TypeCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { @@ -371,6 +378,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } @@ -413,6 +421,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -425,6 +434,221 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ movl(out, Address(out, offset)); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::x86::X86Assembler::shll and + // art::x86::X86Assembler::AddImmediate below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ movl(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ shll(index_reg, Immediate(TIMES_4)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddImmediate(index_reg, Immediate(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_)); + } + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on x86 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathX86 : public SlowPathCode { + public: + ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86); +}; + #undef __ #define __ down_cast<X86Assembler*>(GetAssembler())-> @@ -513,9 +737,9 @@ void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset, } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, @@ -533,6 +757,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, isa_features_(isa_features), method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); @@ -581,7 +806,7 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const { @@ -782,7 +1007,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86::Move32(Location destination, Location source) { @@ -1157,26 +1382,19 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, __ j(final_condition, true_label); } -void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, - Label* true_target, - Label* false_target, - Label* always_true_target) { +void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition, + Label* true_target_in, + Label* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. + Label fallthrough_target; + Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); Location right = locations->InAt(1); - // We don't want true_target as a nullptr. - if (true_target == nullptr) { - true_target = always_true_target; - } - bool falls_through = (false_target == nullptr); - - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - } - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: @@ -1194,119 +1412,141 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr, LOG(FATAL) << "Unexpected compare type " << type; } - if (!falls_through) { + if (false_target != &fallthrough_target) { __ jmp(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } +} + +static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { + // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS + // are set only strictly before `branch`. We can't use the eflags on long/FP + // conditions if they are materialized due to the complex branching. + return cond->IsCondition() && + cond->GetNext() == branch && + cond->InputAt(0)->GetType() != Primitive::kPrimLong && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); } void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - if (cond->IsIntConstant()) { + Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ jmp(always_true_target); + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ jmp(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ jmp(false_target); + } } - } else { - bool is_materialized = - !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); - // Moves do not affect the eflags register, so if the condition is - // evaluated just before the if, we don't need to evaluate it - // again. We can't use the eflags on long/FP conditions if they are - // materialized due to the complex branching. - Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; - bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) - && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type)); - if (is_materialized) { - if (!eflags_set) { - // Materialized condition, compare against 0. - Location lhs = instruction->GetLocations()->InAt(0); - if (lhs.IsRegister()) { - __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); - } else { - __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); - } - __ j(kNotEqual, true_target); + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + if (AreEflagsSetFrom(cond, instruction)) { + if (true_target == nullptr) { + __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target); } else { __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } else { - // Condition has not been materialized, use its inputs as the - // comparison and its condition as the branch condition. - - // Is this a long or FP comparison that has been folded into the HCondition? - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), - cond->AsCondition(), - true_target, - false_target, - always_true_target); - return; + // Materialized condition, compare against 0. + Location lhs = instruction->GetLocations()->InAt(condition_input_index); + if (lhs.IsRegister()) { + __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); + } else { + __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); + } + if (true_target == nullptr) { + __ j(kEqual, false_target); + } else { + __ j(kNotEqual, true_target); } + } + } else { + // Condition has not been materialized, use its inputs as the comparison and + // its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + + // If this is a long or FP comparison that has been folded into + // the HCondition, generate the comparison directly. + Primitive::Type type = condition->InputAt(0)->GetType(); + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + GenerateCompareTestAndBranch(condition, true_target, false_target); + return; + } - Location lhs = cond->GetLocations()->InAt(0); - Location rhs = cond->GetLocations()->InAt(1); - // LHS is guaranteed to be in a register (see - // LocationsBuilderX86::VisitCondition). - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); - } else { - __ cmpl(lhs.AsRegister<Register>(), Immediate(constant)); - } + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition). + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); + } else if (rhs.IsConstant()) { + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + if (constant == 0) { + __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); } else { - __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); + __ cmpl(lhs.AsRegister<Register>(), Immediate(constant)); } - __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); + } else { + __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); + } + if (true_target == nullptr) { + __ j(X86Condition(condition->GetOppositeCondition()), false_target); + } else { + __ j(X86Condition(condition->GetCondition()), true_target); } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ jmp(false_target); } } void LocationsBuilderX86::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } @@ -1315,8 +1555,10 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathX86(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderX86::VisitLocal(HLocal* local) { @@ -1677,19 +1919,27 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. + if (invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } + if (codegen_->IsBaseline()) { // Baseline does not have enough registers if the current method also // needs a register. We therefore do not require a register for it, and let // the code generation of the invoke handle it. LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); } } } @@ -1719,6 +1969,11 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec } void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderX86 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } @@ -1738,6 +1993,9 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { + // This call to HandleInvoke allocates a temporary (core) register + // which is also used to transfer the hidden argument from FP to + // core register. HandleInvoke(invoke); // Add the hidden argument. invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7)); @@ -1745,31 +2003,42 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + LocationSummary* locations = invoke->GetLocations(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // Set the hidden argument. + // Set the hidden argument. This is safe to do this here, as XMM7 + // won't be modified thereafter, before the `call` instruction. + DCHECK_EQ(XMM7, hidden_reg); __ movl(temp, Immediate(invoke->GetDexMethodIndex())); - __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp); + __ movd(hidden_reg, temp); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ movl(temp, Address(ESP, receiver.GetStackIndex())); + // /* HeapReference<Class> */ temp = temp->klass_ __ movl(temp, Address(temp, class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86WordSize).Int32Value())); + __ call(Address(temp, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -2207,6 +2476,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2215,6 +2485,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -2995,7 +3266,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr DCHECK_EQ(EAX, first.AsRegister<Register>()); DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>()); - if (instruction->InputAt(1)->IsIntConstant()) { + if (second.IsConstant()) { int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); if (imm == 0) { @@ -3045,11 +3316,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); } else { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); } break; } @@ -3516,19 +3789,18 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3545,13 +3817,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3760,16 +4032,6 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - if (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) { - // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; - } switch (desired_dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: @@ -3786,6 +4048,32 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOr } } +Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ movl(temp, Address(ESP, location.GetStackIndex())); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ movl(temp, Address(ESP, stack_offset)); + return temp; + } + return location.AsRegister<Register>(); +} + void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -3794,7 +4082,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); @@ -3804,13 +4092,18 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_patches_.emplace_back(invoke->GetTargetMethod()); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement this type. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + uint32_t offset = invoke->GetDexCacheArrayOffset(); + __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); + // Add the patch entry and bind its label at the end of the instruction. + pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, offset); + __ Bind(&pc_relative_dex_cache_patches_.back().label); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { @@ -3821,7 +4114,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_reg = reg; __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } - // temp = temp->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ movl(reg, Address(method_reg, ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value())); // temp = temp[index_in_cache] @@ -3862,13 +4155,24 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp Register temp = temp_in.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); - DCHECK(receiver.IsRegister()); - __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); + // /* HeapReference<Class> */ temp = receiver->klass_ + __ movl(temp, Address(receiver, class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); @@ -3879,23 +4183,33 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); - linker_patches->reserve(method_patches_.size() + relative_call_patches_.size()); + size_t size = + method_patches_.size() + + relative_call_patches_.size() + + pc_relative_dex_cache_patches_.size(); + linker_patches->reserve(size); + // The label points to the end of the "movl" insn but the literal offset for method + // patch needs to point to the embedded constant which occupies the last 4 bytes. + constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; for (const MethodPatchInfo<Label>& info : method_patches_) { - // The label points to the end of the "movl" insn but the literal offset for method - // patch x86 needs to point to the embedded constant which occupies the last 4 bytes. - uint32_t literal_offset = info.label.Position() - 4; + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, info.target_method.dex_file, info.target_method.dex_method_index)); } for (const MethodPatchInfo<Label>& info : relative_call_patches_) { - // The label points to the end of the "call" insn but the literal offset for method - // patch x86 needs to point to the embedded constant which occupies the last 4 bytes. - uint32_t literal_offset = info.label.Position() - 4; + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, info.target_method.dex_file, info.target_method.dex_method_index)); } + for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, + &info.target_dex_file, + GetMethodAddressOffset(), + info.element_offset)); + } } void CodeGeneratorX86::MarkGCCard(Register temp, @@ -3920,18 +4234,29 @@ void CodeGeneratorX86::MarkGCCard(Register temp, void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + kEmitCompilerReadBarrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - // The output overlaps in case of long: we don't want the low move to overwrite - // the object's location. - locations->SetOut(Location::RequiresRegister(), - (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move + // to overwrite the object's location. Likewise, in the case of + // an object field get with read barriers enabled, we do not want + // the move to overwrite the object's location, as we need it to emit + // the read barrier. + locations->SetOut( + Location::RequiresRegister(), + (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ? + Location::kOutputOverlap : + Location::kNoOutputOverlap); } if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { @@ -3947,7 +4272,8 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Register base = locations->InAt(0).AsRegister<Register>(); + Location base_loc = locations->InAt(0); + Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); Primitive::Type field_type = field_info.GetFieldType(); @@ -4022,7 +4348,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<Register>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -4043,16 +4369,16 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI // Ensure the value is in a byte register. locations->SetInAt(1, Location::RegisterLocation(EAX)); } else if (Primitive::IsFloatingPointType(field_type)) { - locations->SetInAt(1, Location::RequiresFpuRegister()); - } else { + if (is_volatile && field_type == Primitive::kPrimDouble) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); + } + } else if (is_volatile && field_type == Primitive::kPrimLong) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. locations->SetInAt(1, Location::RequiresRegister()); - } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } else if (is_volatile && (field_type == Primitive::kPrimLong)) { + // 64bits value can be atomically written to an address with movsd and an XMM register. // We need two XMM registers because there's no easier way to (bit) copy a register pair // into a single XMM register (we copy each pair part into the XMMs and then interleave them). @@ -4060,6 +4386,15 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI // isolated cases when we need this it isn't worth adding the extra complexity. locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too. + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } } } @@ -4081,6 +4416,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } + bool maybe_record_implicit_null_check_done = false; + switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { @@ -4090,7 +4427,12 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimShort: case Primitive::kPrimChar: { - __ movw(Address(base, offset), value.AsRegister<Register>()); + if (value.IsConstant()) { + int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movw(Address(base, offset), Immediate(v)); + } else { + __ movw(Address(base, offset), value.AsRegister<Register>()); + } break; } @@ -4105,6 +4447,9 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, __ movl(temp, value.AsRegister<Register>()); __ PoisonHeapReference(temp); __ movl(Address(base, offset), temp); + } else if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(v)); } else { __ movl(Address(base, offset), value.AsRegister<Register>()); } @@ -4120,21 +4465,40 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, __ punpckldq(temp1, temp2); __ movsd(Address(base, offset), temp1); codegen_->MaybeRecordImplicitNullCheck(instruction); + } else if (value.IsConstant()) { + int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); } else { __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); } + maybe_record_implicit_null_check_done = true; break; } case Primitive::kPrimFloat: { - __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(v)); + } else { + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } case Primitive::kPrimDouble: { - __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); + maybe_record_implicit_null_check_done = true; + } else { + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } @@ -4143,8 +4507,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - // Longs are handled in the switch. - if (field_type != Primitive::kPrimLong) { + if (!maybe_record_implicit_null_check_done) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4313,24 +4676,35 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - // The output overlaps in case of long: we don't want the low move to overwrite - // the array's location. - locations->SetOut(Location::RequiresRegister(), - (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move + // to overwrite the array's location. Likewise, in the case of an + // object array get with read barriers enabled, we do not want the + // move to overwrite the array's location, as we need it to emit + // the read barrier. + locations->SetOut( + Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ? + Location::kOutputOverlap : + Location::kNoOutputOverlap); } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -4385,6 +4759,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { @@ -4449,8 +4826,17 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4461,14 +4847,18 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // optimization. Primitive::Type value_type = instruction->GetComponentType(); + bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); bool is_byte_type = (value_type == Primitive::kPrimBoolean) || (value_type == Primitive::kPrimByte); @@ -4481,7 +4871,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Ensure the value is in a byte register. locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); } else if (Primitive::IsFloatingPointType(value_type)) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } @@ -4495,14 +4885,15 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register array = locations->InAt(0).AsRegister<Register>(); + Location array_loc = locations->InAt(0); + Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4542,6 +4933,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + if (!value.IsRegister()) { // Just setting null. DCHECK(instruction->InputAt(2)->IsNullConstant()); @@ -4549,7 +4941,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ movl(address, Immediate(0)); codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4558,7 +4950,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { NearLabel done, not_null, do_put; SlowPathCode* slow_path = nullptr; Register temp = locations->GetTemp(0).AsRegister<Register>(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4570,22 +4962,62 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, component_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, super_offset)); - // No need to unpoison, we're comparing against null.. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ movl(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ movl(temp, Address(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = register_value->klass_ + // __ movl(temp2, Address(register_value, class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); + // + // __ cmpl(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ jmp(slow_path->GetEntryLabel()); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ movl(temp, Address(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } } } @@ -4596,7 +5028,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } else { __ movl(address, register_value); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4611,6 +5043,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } + case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = index.IsConstant() @@ -4667,8 +5100,14 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<Register>(), TIMES_4, offset); - DCHECK(value.IsFpuRegister()); - __ movss(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movss(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(address, Immediate(v)); + } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4677,8 +5116,19 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) : Address(array, index.AsRegister<Register>(), TIMES_8, offset); - DCHECK(value.IsFpuRegister()); - __ movsd(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movsd(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + Address address_hi = index.IsConstant() ? + Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + kX86WordSize) : + Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize); + int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + __ movl(address, Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(address_hi, Immediate(High32Bits(v))); + } break; } @@ -5064,7 +5514,8 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(EAX)); + Location::RegisterLocation(EAX), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { @@ -5075,31 +5526,60 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leal(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } } else { - DCHECK(cls->CanCallRuntime()); - __ movl(out, Address( - current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. - - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ movl(out, Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ leal(out, Address(out, cache_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ movl(out, Address(out, cache_offset)); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5143,12 +5623,35 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leal(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ leal(out, Address(out, cache_offset)); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ movl(out, Address(out, cache_offset)); + } + __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -5188,44 +5691,49 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86 uses this register too. - locations->SetOut(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(EAX)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86 uses this "out" register too. + locations->SetOut(Location::RequiresRegister()); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5240,15 +5748,9 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ movl(target, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ movl(out, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -5265,13 +5767,23 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5288,6 +5800,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop, success; @@ -5299,8 +5812,17 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5312,6 +5834,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. NearLabel exact_check; @@ -5322,9 +5845,18 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ movl(out, Address(out, component_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5335,6 +5867,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<Register>()); @@ -5343,8 +5876,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -5353,13 +5886,25 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ jmp(&done); } @@ -5384,75 +5929,73 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - break; - case TypeCheckKind::kInterfaceCheck: - case TypeCheckKind::kUnresolvedCheck: - call_kind = LocationSummary::kCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; case TypeCheckKind::kArrayCheck: + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86 uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); - Register temp = locations->WillCall() - ? kNoRegister - : locations->GetTemp(0).AsRegister<Register>(); - + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); - NearLabel done, abstract_entry; + NearLabel done; // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); __ j(kEqual, &done); } - if (locations->WillCall()) { - __ movl(obj, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { @@ -5463,19 +6006,44 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // Jump to slow path for throwing the exception or doing a // more involved array check. - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop, success; + NearLabel loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. __ testl(temp, temp); - // Jump to the slow path to throw the exception. - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -5485,6 +6053,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop; @@ -5496,16 +6065,39 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ testl(temp, temp); __ j(kNotEqual, &loop); - // Jump to the slow path to throw the exception. - __ jmp(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); } else { @@ -5513,29 +6105,67 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ movl(temp, Address(temp, component_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. __ testl(temp, temp); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kEqual, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ jmp(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5551,6 +6181,11 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } @@ -5686,6 +6321,82 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index ac3d06c23d..064051c7f4 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -227,14 +227,12 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target); - void GenerateCompareTestAndBranch(HIf* if_inst, - HCondition* condition, + Label* false_target); + void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); @@ -397,7 +395,64 @@ class CodeGeneratorX86 : public CodeGenerator { void Finalize(CodeAllocator* allocator) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + + struct PcRelativeDexCacheAccessInfo { + PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) + : target_dex_file(dex_file), element_offset(element_off), label() { } + + const DexFile& target_dex_file; + uint32_t element_offset; + // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset. + Label label; + }; + // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. Label frame_entry_label_; @@ -410,6 +465,8 @@ class CodeGeneratorX86 : public CodeGenerator { // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). ArenaDeque<MethodPatchInfo<Label>> method_patches_; ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + // PC-relative DexCache access info. + ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5218d70995..4618be9cc3 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -34,6 +34,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; @@ -52,16 +55,17 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -78,16 +82,17 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -139,18 +144,19 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : instruction_(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), - instruction_, - instruction_->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { - __ jmp(x64_codegen->GetLabelOf(successor_)); + __ jmp(x86_64_codegen->GetLabelOf(successor_)); } } @@ -180,7 +186,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. @@ -196,8 +202,11 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), - instruction_, instruction_->GetDexPc(), this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -222,22 +231,30 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = at_->GetLocations(); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); - x64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) - : QUICK_ENTRY_POINT(pInitializeType), - at_, dex_pc_, this); + x86_64_codegen->InvokeRuntime(do_clinit_ ? + QUICK_ENTRY_POINT(pInitializeStaticStorage) : + QUICK_ENTRY_POINT(pInitializeType), + at_, + dex_pc_, + this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } Location out = locations->Out(); // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - x64_codegen->Move(out, Location::RegisterLocation(RAX)); + x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); @@ -271,18 +288,19 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), - instruction_, - instruction_->GetDexPc(), - this); - x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -308,18 +326,9 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -336,21 +345,24 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), + instruction_, + dex_pc, + this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction_, - dex_pc, - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), + instruction_, + dex_pc, + this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { if (instruction_->IsInstanceOf()) { - x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); @@ -375,15 +387,16 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); DCHECK(instruction_->IsDeoptimize()); HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - deoptimize, - deoptimize->GetDexPc(), - this); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + deoptimize, + deoptimize->GetDexPc(), + this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -421,11 +434,12 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); - CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction_, - instruction_->GetDexPc(), - this); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -438,6 +452,219 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial + // object has been overwritten by (or after) the heap object + // reference load to be instrumented, e.g.: + // + // __ movl(out, Address(out, offset)); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; +} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + CpuRegister reg_out = out_.AsRegister<CpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute real offset and store it in index_. + Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::x86_64::X86_64Assembler::shll and + // art::x86_64::X86_64Assembler::AddImmediate below), but it + // has not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); + __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the + // scale factor (2) cannot overflow in practice, as the + // runtime is unable to allocate object arrays with a size + // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). + __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegister()); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); + } + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierForHeapReferenceSlowPathX86_64"; + } + + private: + CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister()); + size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<CpuRegister>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on x86-64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); +}; + #undef __ #define __ down_cast<X86_64Assembler*>(GetAssembler())-> @@ -503,7 +730,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo Address::Absolute(invoke->GetStringInitOffset(), true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress())); @@ -514,15 +741,15 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetDexCacheArrayOffset()); + pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetDexCacheArrayOffset()); __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, false /* no_rip */)); // Bind the label at the end of the "movl" insn. - __ Bind(&pc_rel_dex_cache_patches_.back().label); + __ Bind(&pc_relative_dex_cache_patches_.back().label); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; CpuRegister reg = temp.AsRegister<CpuRegister>(); if (current_method.IsRegister()) { @@ -533,7 +760,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo method_reg = reg.AsRegister(); __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } - // temp = temp->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ movq(reg, Address(CpuRegister(method_reg), ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue())); @@ -575,13 +802,25 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t CpuRegister temp = temp_in.AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); + size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - // temp = object->GetClass(); - DCHECK(receiver.IsRegister()); - __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); + // /* HeapReference<Class> */ temp = receiver->klass_ + __ movl(temp, Address(CpuRegister(receiver), class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ movq(temp, Address(temp, method_offset)); @@ -593,28 +832,27 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - method_patches_.size() + relative_call_patches_.size() + pc_rel_dex_cache_patches_.size(); + method_patches_.size() + + relative_call_patches_.size() + + pc_relative_dex_cache_patches_.size(); linker_patches->reserve(size); + // The label points to the end of the "movl" insn but the literal offset for method + // patch needs to point to the embedded constant which occupies the last 4 bytes. + constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; for (const MethodPatchInfo<Label>& info : method_patches_) { - // The label points to the end of the "movl" instruction but the literal offset for method - // patch x86 needs to point to the embedded constant which occupies the last 4 bytes. - uint32_t literal_offset = info.label.Position() - 4; + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, info.target_method.dex_file, info.target_method.dex_method_index)); } for (const MethodPatchInfo<Label>& info : relative_call_patches_) { - // The label points to the end of the "call" instruction but the literal offset for method - // patch x86 needs to point to the embedded constant which occupies the last 4 bytes. - uint32_t literal_offset = info.label.Position() - 4; + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, info.target_method.dex_file, info.target_method.dex_method_index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) { - // The label points to the end of the "mov" instruction but the literal offset for method - // patch x86 needs to point to the embedded constant which occupies the last 4 bytes. - uint32_t literal_offset = info.label.Position() - 4; + for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, &info.target_dex_file, info.label.Position(), @@ -673,9 +911,9 @@ static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, @@ -695,7 +933,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, constant_area_start_(0), method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -729,7 +967,7 @@ Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { @@ -1083,26 +1321,19 @@ void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, __ j(X86_64FPCondition(cond->GetCondition()), true_label); } -void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr, - HCondition* condition, - Label* true_target, - Label* false_target, - Label* always_true_target) { +void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, + Label* true_target_in, + Label* false_target_in) { + // Generated branching requires both targets to be explicit. If either of the + // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. + Label fallthrough_target; + Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); Location right = locations->InAt(1); - // We don't want true_target as a nullptr. - if (true_target == nullptr) { - true_target = always_true_target; - } - bool falls_through = (false_target == nullptr); - - // FP compares don't like null false_targets. - if (false_target == nullptr) { - false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - } - Primitive::Type type = condition->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -1161,117 +1392,140 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr, LOG(FATAL) << "Unexpected condition type " << type; } - if (!falls_through) { + if (false_target != &fallthrough_target) { __ jmp(false_target); } + + if (fallthrough_target.IsLinked()) { + __ Bind(&fallthrough_target); + } +} + +static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { + // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS + // are set only strictly before `branch`. We can't use the eflags on long + // conditions if they are materialized due to the complex branching. + return cond->IsCondition() && + cond->GetNext() == branch && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); } void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target) { - HInstruction* cond = instruction->InputAt(0); - if (cond->IsIntConstant()) { + Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. - int32_t cond_value = cond->AsIntConstant()->GetValue(); - if (cond_value == 1) { - if (always_true_target != nullptr) { - __ jmp(always_true_target); + if (cond->AsIntConstant()->IsOne()) { + if (true_target != nullptr) { + __ jmp(true_target); } - return; } else { - DCHECK_EQ(cond_value, 0); + DCHECK(cond->AsIntConstant()->IsZero()); + if (false_target != nullptr) { + __ jmp(false_target); + } } - } else { - bool is_materialized = - !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); - // Moves do not affect the eflags register, so if the condition is - // evaluated just before the if, we don't need to evaluate it - // again. We can't use the eflags on FP conditions if they are - // materialized due to the complex branching. - Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; - bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) - && !Primitive::IsFloatingPointType(type); - - if (is_materialized) { - if (!eflags_set) { - // Materialized condition, compare against 0. - Location lhs = instruction->GetLocations()->InAt(0); - if (lhs.IsRegister()) { - __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); - } else { - __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), - Immediate(0)); - } - __ j(kNotEqual, true_target); + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + if (AreEflagsSetFrom(cond, instruction)) { + if (true_target == nullptr) { + __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target); } else { __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); } } else { - // Condition has not been materialized, use its inputs as the - // comparison and its condition as the branch condition. - - // Is this a long or FP comparison that has been folded into the HCondition? - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { - // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), - true_target, false_target, always_true_target); - return; + // Materialized condition, compare against 0. + Location lhs = instruction->GetLocations()->InAt(condition_input_index); + if (lhs.IsRegister()) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); + } + if (true_target == nullptr) { + __ j(kEqual, false_target); + } else { + __ j(kNotEqual, true_target); } + } + } else { + // Condition has not been materialized, use its inputs as the + // comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + + // If this is a long or FP comparison that has been folded into + // the HCondition, generate the comparison directly. + Primitive::Type type = condition->InputAt(0)->GetType(); + if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + GenerateCompareTestAndBranch(condition, true_target, false_target); + return; + } - Location lhs = cond->GetLocations()->InAt(0); - Location rhs = cond->GetLocations()->InAt(1); - if (rhs.IsRegister()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); - } else if (rhs.IsConstant()) { - int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); - if (constant == 0) { - __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); - } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); - } + Location lhs = condition->GetLocations()->InAt(0); + Location rhs = condition->GetLocations()->InAt(1); + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); + } else if (rhs.IsConstant()) { + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { - __ cmpl(lhs.AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), rhs.GetStackIndex())); + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); } - __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), + Address(CpuRegister(RSP), rhs.GetStackIndex())); + } + if (true_target == nullptr) { + __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); + } else { + __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); } } - if (false_target != nullptr) { + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { __ jmp(false_target); } } void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { - Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - Label* always_true_target = true_target; - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - always_true_target = nullptr; - } - if (codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - false_target = nullptr; - } - GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); + HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); + HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); + Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); + GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - HInstruction* cond = deoptimize->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } } @@ -1280,8 +1534,10 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathX86_64(deoptimize); codegen_->AddSlowPath(slow_path); - Label* slow_path_entry = slow_path->GetEntryLabel(); - GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); + GenerateTestAndBranch(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); } void LocationsBuilderX86_64::VisitLocal(HLocal* local) { @@ -1819,7 +2075,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { @@ -1890,7 +2146,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1903,31 +2158,41 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); + LocationSummary* locations = invoke->GetLocations(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - // Set the hidden argument. - CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(); + // Set the hidden argument. This is safe to do this here, as RAX + // won't be modified thereafter, before the `call` instruction. + DCHECK_EQ(RAX, hidden_reg.AsRegister()); codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); + // /* HeapReference<Class> */ temp = temp->klass_ __ movl(temp, Address(temp, class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); + __ call(Address(temp, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -2562,7 +2827,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); // We can use a leaq or addq if the constant can fit in an immediate. - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2682,7 +2947,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -3517,22 +3782,19 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), - instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3551,13 +3813,13 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3669,13 +3931,23 @@ void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the move to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -3684,7 +3956,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); + Location base_loc = locations->InAt(0); + CpuRegister base = base_loc.AsRegister<CpuRegister>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); Primitive::Type field_type = field_info.GetFieldType(); @@ -3744,7 +4017,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -3755,14 +4028,25 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); Primitive::Type field_type = field_info.GetFieldType(); + bool is_volatile = field_info.IsVolatile(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { - locations->SetInAt(1, Location::RequiresFpuRegister()); + if (is_volatile) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); + } else { + locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); + } } else { - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); + if (is_volatile) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1))); + } else { + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + } } if (needs_write_barrier) { // Temporary registers for the write barrier. @@ -3790,11 +4074,13 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } + bool maybe_record_implicit_null_check_done = false; + switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { if (value.IsConstant()) { - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movb(Address(base, offset), Immediate(v)); } else { __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); @@ -3805,7 +4091,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimShort: case Primitive::kPrimChar: { if (value.IsConstant()) { - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movw(Address(base, offset), Immediate(v)); } else { __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); @@ -3838,9 +4124,11 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimLong: { if (value.IsConstant()) { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(Address(base, offset), Immediate(v_32)); + codegen_->MoveInt64ToAddress(Address(base, offset), + Address(base, offset + sizeof(int32_t)), + v, + instruction); + maybe_record_implicit_null_check_done = true; } else { __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); } @@ -3848,12 +4136,28 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimFloat: { - __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int32_t v = + bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(Address(base, offset), Immediate(v)); + } else { + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } case Primitive::kPrimDouble: { - __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int64_t v = + bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + codegen_->MoveInt64ToAddress(Address(base, offset), + Address(base, offset + sizeof(int32_t)), + v, + instruction); + maybe_record_implicit_null_check_done = true; + } else { + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } @@ -3862,7 +4166,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); + if (!maybe_record_implicit_null_check_done) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); @@ -4029,20 +4335,31 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object array get when read barriers + // are enabled: we do not want the move to overwrite the array's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -4097,8 +4414,9 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { @@ -4153,8 +4471,17 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4164,37 +4491,41 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetInAt(2, Location::RequiresRegister()); - if (value_type == Primitive::kPrimLong) { - locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2))); - } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(value_type)) { + locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } if (needs_write_barrier) { // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + + // This first temporary register is possibly used for heap + // reference poisoning and/or read barrier emission too. + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>(); + Location array_loc = locations->InAt(0); + CpuRegister array = array_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -4238,6 +4569,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + if (!value.IsRegister()) { // Just setting null. DCHECK(instruction->InputAt(2)->IsNullConstant()); @@ -4266,22 +4598,62 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ Bind(¬_null); } - __ movl(temp, Address(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, component_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ cmpl(temp, Address(register_value, class_offset)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - __ j(kEqual, &do_put); - __ MaybeUnpoisonHeapReference(temp); - __ movl(temp, Address(temp, super_offset)); - // No need to unpoison the result, we're comparing against null. - __ testl(temp, temp); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ movl(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ movl(temp, Address(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = register_value->klass_ + // __ movl(temp2, Address(register_value, class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); + // + // __ cmpl(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ jmp(slow_path->GetEntryLabel()); } else { - __ j(kNotEqual, slow_path->GetEntryLabel()); + // /* HeapReference<Class> */ temp = array->klass_ + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ movl(temp, Address(temp, component_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor the object reference in `register_value->klass`, as + // we are comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ movl(temp, Address(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); + } } } @@ -4307,6 +4679,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } + case Primitive::kPrimInt: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = index.IsConstant() @@ -4330,13 +4703,15 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); if (value.IsRegister()) { __ movq(address, value.AsRegister<CpuRegister>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); } else { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(address, Immediate(v_32)); + Address address_high = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + sizeof(int32_t)) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + codegen_->MoveInt64ToAddress(address, address_high, v, instruction); } - codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4345,8 +4720,14 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); - DCHECK(value.IsFpuRegister()); - __ movss(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movss(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + int32_t v = + bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(address, Immediate(v)); + } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4356,9 +4737,18 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); - DCHECK(value.IsFpuRegister()); - __ movsd(address, value.AsFpuRegister<XmmRegister>()); - codegen_->MaybeRecordImplicitNullCheck(instruction); + if (value.IsFpuRegister()) { + __ movsd(address, value.AsFpuRegister<XmmRegister>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } else { + int64_t v = + bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + Address address_high = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + sizeof(int32_t)) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + codegen_->MoveInt64ToAddress(address, address_high, v, instruction); + } break; } @@ -4739,7 +5129,8 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(RAX)); + Location::RegisterLocation(RAX), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { @@ -4750,31 +5141,58 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leaq(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } } else { - DCHECK(cls->CanCallRuntime()); - __ movq(out, Address( - current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. - - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ movq(out, Address(current_method, + ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ leaq(out, Address(out, cache_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ movl(out, Address(out, cache_offset)); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -4809,12 +5227,35 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); - __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ leaq(out, Address(current_method, declaring_class_offset)); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ movl(out, Address(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ leaq(out, Address(out, cache_offset)); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ movl(out, Address(out, cache_offset)); + } + __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -4854,44 +5295,49 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86_64 uses this register too. - locations->SetOut(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(RAX)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86_64 uses this "out" register too. + locations->SetOut(Location::RequiresRegister()); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4906,15 +5352,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ movl(target, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ movl(out, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -4936,13 +5376,23 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -4959,6 +5409,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop, success; @@ -4970,8 +5421,17 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ movl(out, Address(out, super_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -4983,6 +5443,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. NearLabel exact_check; @@ -4993,9 +5454,18 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + __ movl(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ movl(out, Address(out, component_offset)); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5006,6 +5476,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ jmp(&done); break; } + case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<CpuRegister>()); @@ -5014,8 +5485,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -5024,13 +5495,25 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ jmp(&done); } @@ -5055,58 +5538,60 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - // Note that TypeCheckSlowPathX86_64 uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + Location obj_loc = locations->InAt(0); + CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister temp = locations->WillCall() - ? CpuRegister(kNoRegister) - : locations->GetTemp(0).AsRegister<CpuRegister>(); - + Location temp_loc = locations->GetTemp(0); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); NearLabel done; // Avoid null check if we know obj is not null. @@ -5115,15 +5600,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); } - if (locations->WillCall()) { - __ movl(obj, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ movl(temp, Address(obj, class_offset)); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { if (cls.IsRegister()) { @@ -5134,19 +5615,44 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // Jump to slow path for throwing the exception or doing a // more involved array check. - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - NearLabel loop; + NearLabel loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. __ testl(temp, temp); - // Jump to the slow path to throw the exception. - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -5156,6 +5662,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. NearLabel loop; @@ -5167,16 +5674,39 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ movl(temp, Address(temp, super_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ testl(temp, temp); __ j(kNotEqual, &loop); - // Jump to the slow path to throw the exception. - __ jmp(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + NearLabel check_non_primitive_component_type; if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { @@ -5184,29 +5714,67 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); + __ movl(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ movl(temp, Address(temp, component_offset)); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. __ testl(temp, temp); - __ j(kEqual, slow_path->GetEntryLabel()); + __ j(kNotEqual, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ j(kEqual, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ movl(temp, Address(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ jmp(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ jmp(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5222,6 +5790,11 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } @@ -5339,6 +5912,82 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } +void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); + } +} + +void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; @@ -5564,6 +6213,24 @@ Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { return Address::RIP(table_fixup); } +void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, + const Address& addr_high, + int64_t v, + HInstruction* instruction) { + if (IsInt<32>(v)) { + int32_t v_32 = v; + __ movq(addr_low, Immediate(v_32)); + MaybeRecordImplicitNullCheck(instruction); + } else { + // Didn't fit in a register. Do it in pieces. + int32_t low_v = Low32Bits(v); + int32_t high_v = High32Bits(v); + __ movl(addr_low, Immediate(low_v)); + MaybeRecordImplicitNullCheck(instruction); + __ movl(addr_high, Immediate(high_v)); + } +} + #undef __ } // namespace x86_64 diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index fc485f5bb6..145b1f33b4 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -217,14 +217,12 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, Label* true_target, - Label* false_target, - Label* always_true_target); - void GenerateCompareTestAndBranch(HIf* if_inst, - HCondition* condition, + Label* false_target); + void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, - Label* false_target, - Label* always_true_target); + Label* false_target); void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); @@ -352,6 +350,51 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` provided (i.e., when it is different from + // Location::NoLocation()), the offset value passed to + // artReadBarrierSlow is adjusted to take `index` into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + int ConstantAreaStart() const { return constant_area_start_; } @@ -368,6 +411,12 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); + // Assign a 64 bit constant to an address. + void MoveInt64ToAddress(const Address& addr_low, + const Address& addr_high, + int64_t v, + HInstruction* instruction); + private: struct PcRelativeDexCacheAccessInfo { PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) @@ -395,7 +444,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<MethodPatchInfo<Label>> method_patches_; ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_; + ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; // When we don't know the proper offset for the value, we use kDummy32BitOffset. // We will fix this up in the linker later to have the right value. diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index e1a8c9cc0f..af8b8b562a 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ +#include "code_generator.h" #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" @@ -255,6 +256,67 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, return true; } +static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kASR: return vixl::ASR; + case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL; + case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_SHIFT; + } +} + +static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB; + case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH; + case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW; + case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB; + case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH; + case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_EXTEND; + } +} + +static inline bool CanFitInShifterOperand(HInstruction* instruction) { + if (instruction->IsTypeConversion()) { + HTypeConversion* conversion = instruction->AsTypeConversion(); + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + // We don't expect to see the same type as input and result. + return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && + (result_type != input_type); + } else { + return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || + (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) || + (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant()); + } +} + +static inline bool HasShifterOperand(HInstruction* instr) { + // `neg` instructions are an alias of `sub` using the zero register as the + // first register input. + bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() || + instr->IsOr() || instr->IsSub() || instr->IsXor(); + return res; +} + +static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { + DCHECK(HasShifterOperand(instruction)); + // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` + // does *not* support extension. This is because the `extended register` form + // of the `sub` instruction interprets the left register with code 31 as the + // stack pointer and not the zero register. (So does the `immediate` form.) In + // the other form `shifted register, the register with code 31 is interpreted + // as the zero register. + return instruction->IsAdd() || instruction->IsSub(); +} + } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h new file mode 100644 index 0000000000..b459d24d7c --- /dev/null +++ b/compiler/optimizing/common_dominator.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ +#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ + +#include "nodes.h" + +namespace art { + +// Helper class for finding common dominators of two or more blocks in a graph. +// The domination information of a graph must not be modified while there is +// a CommonDominator object as it's internal state could become invalid. +class CommonDominator { + public: + // Convenience function to find the common dominator of 2 blocks. + static HBasicBlock* ForPair(HBasicBlock* block1, HBasicBlock* block2) { + CommonDominator finder(block1); + finder.Update(block2); + return finder.Get(); + } + + // Create a finder starting with a given block. + explicit CommonDominator(HBasicBlock* block) + : dominator_(block), chain_length_(ChainLength(block)) { + DCHECK(block != nullptr); + } + + // Update the common dominator with another block. + void Update(HBasicBlock* block) { + DCHECK(block != nullptr); + HBasicBlock* block2 = dominator_; + DCHECK(block2 != nullptr); + if (block == block2) { + return; + } + size_t chain_length = ChainLength(block); + size_t chain_length2 = chain_length_; + // Equalize the chain lengths + for ( ; chain_length > chain_length2; --chain_length) { + block = block->GetDominator(); + DCHECK(block != nullptr); + } + for ( ; chain_length2 > chain_length; --chain_length2) { + block2 = block2->GetDominator(); + DCHECK(block2 != nullptr); + } + // Now run up the chain until we hit the common dominator. + while (block != block2) { + --chain_length; + block = block->GetDominator(); + DCHECK(block != nullptr); + block2 = block2->GetDominator(); + DCHECK(block2 != nullptr); + } + dominator_ = block; + chain_length_ = chain_length; + } + + HBasicBlock* Get() const { + return dominator_; + } + + private: + static size_t ChainLength(HBasicBlock* block) { + size_t result = 0; + while (block != nullptr) { + ++result; + block = block->GetDominator(); + } + return result; + } + + HBasicBlock* dominator_; + size_t chain_length_; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 9754043f32..02e5dab3d4 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -123,20 +123,21 @@ void HDeadCodeElimination::RemoveDeadBlocks() { } // If we removed at least one block, we need to recompute the full - // dominator tree. + // dominator tree and try block membership. if (removed_one_or_more_blocks) { graph_->ClearDominanceInformation(); graph_->ComputeDominanceInformation(); + graph_->ComputeTryBlockInformation(); } // Connect successive blocks created by dead branches. Order does not matter. for (HReversePostOrderIterator it(*graph_); !it.Done();) { HBasicBlock* block = it.Current(); - if (block->IsEntryBlock() || block->GetSuccessors().size() != 1u) { + if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) { it.Advance(); continue; } - HBasicBlock* successor = block->GetSuccessors()[0]; + HBasicBlock* successor = block->GetSingleSuccessor(); if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) { it.Advance(); continue; @@ -176,10 +177,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { } void HDeadCodeElimination::Run() { - if (!graph_->HasTryCatch()) { - // TODO: Update dead block elimination and enable for try/catch. - RemoveDeadBlocks(); - } + RemoveDeadBlocks(); SsaRedundantPhiElimination(graph_).Run(); RemoveDeadInstructions(); } diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc new file mode 100644 index 0000000000..65820630f8 --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_cache_array_fixups_arm.h" + +#include "base/arena_containers.h" +#include "utils/dex_cache_arrays_layout-inl.h" + +namespace art { +namespace arm { + +/** + * Finds instructions that need the dex cache arrays base as an input. + */ +class DexCacheArrayFixupsVisitor : public HGraphVisitor { + public: + explicit DexCacheArrayFixupsVisitor(HGraph* graph) + : HGraphVisitor(graph), + dex_cache_array_bases_(std::less<const DexFile*>(), + // Attribute memory use to code generator. + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HArmDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative access to the dex cache methods array, + // we need to add the dex cache arrays base as the special input. + if (invoke->HasPcRelativeDexCache()) { + // Initialize base for target method dex file if needed. + MethodReference target_method = invoke->GetTargetMethod(); + HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file); + base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base); + } + } + + HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { + // Ensure we only initialize the pointer once for each dex file. + auto lb = dex_cache_array_bases_.lower_bound(&dex_file); + if (lb != dex_cache_array_bases_.end() && + !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) { + return lb->second; + } + + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); + dex_cache_array_bases_.PutBefore(lb, &dex_file, base); + return base; + } + + using DexCacheArraysBaseMap = + ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>; + DexCacheArraysBaseMap dex_cache_array_bases_; +}; + +void DexCacheArrayFixups::Run() { + DexCacheArrayFixupsVisitor visitor(graph_); + visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); +} + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h new file mode 100644 index 0000000000..015f910328 --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_arm.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ +#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace arm { + +class DexCacheArrayFixups : public HOptimization { + public: + DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {} + + void Run() OVERRIDE; +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 3de96b5d84..c16b872466 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -163,12 +163,12 @@ void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) { } void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { - // Ensure that all exception handlers are catch blocks and that handlers - // are not listed multiple times. + ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers(); + + // Ensure that all exception handlers are catch blocks. // Note that a normal-flow successor may be a catch block before CFG // simplification. We only test normal-flow successors in SsaChecker. - for (HExceptionHandlerIterator it(*try_boundary); !it.Done(); it.Advance()) { - HBasicBlock* handler = it.Current(); + for (HBasicBlock* handler : handlers) { if (!handler->IsCatchBlock()) { AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which " "is not a catch block.", @@ -177,9 +177,13 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { try_boundary->GetId(), handler->GetBlockId())); } - if (current_block_->HasSuccessor(handler, it.CurrentSuccessorIndex() + 1)) { - AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.", - handler->GetBlockId(), + } + + // Ensure that handlers are not listed multiple times. + for (size_t i = 0, e = handlers.size(); i < e; ++i) { + if (ContainsElement(handlers, handlers[i], i + 1)) { + AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.", + handlers[i]->GetBlockId(), try_boundary->DebugName(), try_boundary->GetId())); } @@ -188,6 +192,21 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { VisitInstruction(try_boundary); } +void GraphChecker::VisitLoadException(HLoadException* load) { + // Ensure that LoadException is the first instruction in a catch block. + if (!load->GetBlock()->IsCatchBlock()) { + AddError(StringPrintf("%s:%d is in a non-catch block %d.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId())); + } else if (load->GetBlock()->GetFirstInstruction() != load) { + AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId())); + } +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -242,10 +261,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } size_t use_index = use_it.Current()->GetIndex(); if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) { - AddError(StringPrintf("User %s:%d of instruction %d has a wrong " + AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong " "UseListNode index.", use->DebugName(), use->GetId(), + instruction->DebugName(), instruction->GetId())); } } @@ -355,17 +375,14 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { // Ensure that catch blocks are not normal successors, and normal blocks are // never exceptional successors. - const size_t num_normal_successors = block->NumberOfNormalSuccessors(); - for (size_t j = 0; j < num_normal_successors; ++j) { - HBasicBlock* successor = block->GetSuccessors()[j]; + for (HBasicBlock* successor : block->GetNormalSuccessors()) { if (successor->IsCatchBlock()) { AddError(StringPrintf("Catch block %d is a normal successor of block %d.", successor->GetBlockId(), block->GetBlockId())); } } - for (size_t j = num_normal_successors, e = block->GetSuccessors().size(); j < e; ++j) { - HBasicBlock* successor = block->GetSuccessors()[j]; + for (HBasicBlock* successor : block->GetExceptionalSuccessors()) { if (!successor->IsCatchBlock()) { AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.", successor->GetBlockId(), @@ -377,10 +394,14 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { // block with multiple successors to a block with multiple // predecessors). Exceptional edges are synthesized and hence // not accounted for. - if (block->NumberOfNormalSuccessors() > 1) { - for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) { - HBasicBlock* successor = block->GetSuccessors()[j]; - if (successor->GetPredecessors().size() > 1) { + if (block->GetSuccessors().size() > 1) { + for (HBasicBlock* successor : block->GetNormalSuccessors()) { + if (successor->IsExitBlock() && + block->IsSingleTryBoundary() && + block->GetPredecessors().size() == 1u && + block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()) { + // Allowed critical edge Throw->TryBoundary->Exit. + } else if (successor->GetPredecessors().size() > 1) { AddError(StringPrintf("Critical edge between blocks %d and %d.", block->GetBlockId(), successor->GetBlockId())); @@ -445,12 +466,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); - // Ensure the pre-header block is first in the list of - // predecessors of a loop header. + // Ensure the pre-header block is first in the list of predecessors of a loop + // header and that the header block is its only successor. if (!loop_header->IsLoopPreHeaderFirstPredecessor()) { AddError(StringPrintf( "Loop pre-header is not the first predecessor of the loop header %d.", id)); + } else if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) { + AddError(StringPrintf( + "Loop pre-header %d of loop defined by header %d has %zu successors.", + loop_information->GetPreHeader()->GetBlockId(), + id, + loop_information->GetPreHeader()->GetSuccessors().size())); } // Ensure the loop header has only one incoming branch and the remaining @@ -493,6 +520,13 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { "Loop defined by header %d has an invalid back edge %d.", id, back_edge_id)); + } else if (back_edge->GetLoopInformation() != loop_information) { + AddError(StringPrintf( + "Back edge %d of loop defined by header %d belongs to nested loop " + "with header %d.", + back_edge_id, + id, + back_edge->GetLoopInformation()->GetHeader()->GetBlockId())); } } } @@ -531,10 +565,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); if (!use->IsPhi() && !instruction->StrictlyDominates(use)) { - AddError(StringPrintf("Instruction %d in block %d does not dominate " - "use %d in block %d.", - instruction->GetId(), current_block_->GetBlockId(), - use->GetId(), use->GetBlock()->GetBlockId())); + AddError(StringPrintf("Instruction %s:%d in block %d does not dominate " + "use %s:%d in block %d.", + instruction->DebugName(), + instruction->GetId(), + current_block_->GetBlockId(), + use->DebugName(), + use->GetId(), + use->GetBlock()->GetBlockId())); } } @@ -697,26 +735,31 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } - // Test phi equivalents. There should not be two of the same type and they - // should only be created for constants which were untyped in DEX. - for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - HPhi* other_phi = phi_it.Current()->AsPhi(); - if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { - if (phi->GetType() == other_phi->GetType()) { - std::stringstream type_str; - type_str << phi->GetType(); - AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", - phi->GetId(), - phi->GetRegNumber(), - type_str.str().c_str())); - } else { - ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); - if (!IsConstantEquivalent(phi, other_phi, &visited)) { - AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " - "are not equivalents of constants.", + // Test phi equivalents. There should not be two of the same type and they should only be + // created for constants which were untyped in DEX. Note that this test can be skipped for + // a synthetic phi (indicated by lack of a virtual register). + if (phi->GetRegNumber() != kNoRegNumber) { + for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); + !phi_it.Done(); + phi_it.Advance()) { + HPhi* other_phi = phi_it.Current()->AsPhi(); + if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { + if (phi->GetType() == other_phi->GetType()) { + std::stringstream type_str; + type_str << phi->GetType(); + AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", phi->GetId(), - other_phi->GetId(), - phi->GetRegNumber())); + phi->GetRegNumber(), + type_str.str().c_str())); + } else { + ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); + if (!IsConstantEquivalent(phi, other_phi, &visited)) { + AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " + "are not equivalents of constants.", + phi->GetId(), + other_phi->GetId(), + phi->GetRegNumber())); + } } } } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index abf3659d91..d5ddbabc8c 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -50,6 +50,9 @@ class GraphChecker : public HGraphDelegateVisitor { // Check successors of blocks ending in TryBoundary. void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; + // Check that LoadException is the first instruction in a catch block. + void VisitLoadException(HLoadException* load) OVERRIDE; + // Check that HCheckCast and HInstanceOf have HLoadClass as second input. void VisitCheckCast(HCheckCast* check) OVERRIDE; void VisitInstanceOf(HInstanceOf* check) OVERRIDE; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 4111671a9b..e9fdb84d1e 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -24,6 +24,7 @@ #include "code_generator.h" #include "dead_code_elimination.h" #include "disassembler.h" +#include "inliner.h" #include "licm.h" #include "nodes.h" #include "optimization.h" @@ -252,8 +253,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintSuccessors(HBasicBlock* block) { AddIndent(); output_ << "successors"; - for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) { - HBasicBlock* successor = block->GetSuccessors()[i]; + for (HBasicBlock* successor : block->GetNormalSuccessors()) { output_ << " \"B" << successor->GetBlockId() << "\" "; } output_<< std::endl; @@ -262,8 +262,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintExceptionHandlers(HBasicBlock* block) { AddIndent(); output_ << "xhandlers"; - for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().size(); ++i) { - HBasicBlock* handler = block->GetSuccessors()[i]; + for (HBasicBlock* handler : block->GetExceptionalSuccessors()) { output_ << " \"B" << handler->GetBlockId() << "\" "; } if (block->IsExitBlock() && @@ -394,9 +393,15 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { VisitInvoke(invoke); - StartAttributeStream("recursive") << std::boolalpha - << invoke->IsRecursive() - << std::noboolalpha; + StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind(); + StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); + if (invoke->IsStatic()) { + StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement(); + } + } + + void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + VisitInvoke(invoke); StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); } @@ -420,13 +425,21 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } - bool IsPass(const char* name) { - return strcmp(pass_name_, name) == 0; +#ifdef ART_ENABLE_CODEGEN_arm64 + void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { + StartAttributeStream("shift") << instruction->GetShiftAmount(); + } + } + + void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); } +#endif - bool IsReferenceTypePropagationPass() { - return strstr(pass_name_, ReferenceTypePropagation::kReferenceTypePropagationPassName) - != nullptr; + bool IsPass(const char* name) { + return strcmp(pass_name_, name) == 0; } void PrintInstruction(HInstruction* instruction) { @@ -492,7 +505,8 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } - } else if (IsReferenceTypePropagationPass() + } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName) + || IsPass(HInliner::kInlinerPassName)) && (instruction->GetType() == Primitive::kPrimNot)) { ReferenceTypeInfo info = instruction->IsLoadClass() ? instruction->AsLoadClass()->GetLoadedClassRTI() @@ -505,6 +519,18 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; } else if (instruction->IsLoadClass()) { StartAttributeStream("klass") << "unresolved"; + } else if (instruction->IsNullConstant()) { + // The NullConstant may be added to the graph during other passes that happen between + // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner + // doesn't run or doesn't inline anything, the NullConstant remains untyped. + // So we should check NullConstants for validity only after reference type propagation. + // + // Note: The infrastructure to properly type NullConstants everywhere is to complex to add + // for the benefits. + StartAttributeStream("klass") << "not_set"; + DCHECK(!is_after_pass_ + || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)) + << " Expected a valid rti after reference type propagation"; } else { DCHECK(!is_after_pass_) << "Expected a valid rti after reference type propagation"; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c36de84064..4af111b784 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -377,9 +377,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { - set->Kill(current->GetSideEffects()); // Save the next instruction in case `current` is removed from the graph. HInstruction* next = current->GetNext(); + // Do not kill the set with the side effects of the instruction just now: if + // the instruction is GVN'ed, we don't need to kill. if (current->CanBeMoved()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) @@ -395,8 +396,11 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { current->ReplaceWith(existing); current->GetBlock()->RemoveInstruction(current); } else { + set->Kill(current->GetSideEffects()); set->Add(current); } + } else { + set->Kill(current->GetSideEffects()); } current = next; } diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index b7262f6b29..5de94f43c9 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -69,10 +69,13 @@ class InductionVarAnalysisTest : public testing::Test { entry_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(entry_); BuildForLoop(0, n); + return_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(return_); exit_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(exit_); entry_->AddSuccessor(loop_preheader_[0]); - loop_header_[0]->AddSuccessor(exit_); + loop_header_[0]->AddSuccessor(return_); + return_->AddSuccessor(exit_); graph_->SetEntryBlock(entry_); graph_->SetExitBlock(exit_); @@ -91,6 +94,7 @@ class InductionVarAnalysisTest : public testing::Test { entry_->AddInstruction(new (&allocator_) HStoreLocal(tmp_, constant100_)); dum_ = new (&allocator_) HLocal(n + 2); entry_->AddInstruction(dum_); + return_->AddInstruction(new (&allocator_) HReturnVoid()); exit_->AddInstruction(new (&allocator_) HExit()); // Provide loop instructions. @@ -177,6 +181,7 @@ class InductionVarAnalysisTest : public testing::Test { // Fixed basic blocks and instructions. HBasicBlock* entry_; + HBasicBlock* return_; HBasicBlock* exit_; HInstruction* parameter_; // "this" HInstruction* constant0_; diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 5530d261d2..2ac1e152e1 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -75,10 +75,12 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { return v; } -static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) { - DCHECK(preheader != nullptr); +/** Helper method to insert an instruction. */ +static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { + DCHECK(block != nullptr); + DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId(); DCHECK(instruction != nullptr); - preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction()); + block->InsertInstructionBefore(instruction, block->GetLastInstruction()); return instruction; } @@ -91,48 +93,98 @@ InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis) DCHECK(induction_analysis != nullptr); } -InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context, - HInstruction* instruction) { - return GetInduction(context, instruction, /* is_min */ true); -} - -InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context, - HInstruction* instruction) { - return SimplifyMax(GetInduction(context, instruction, /* is_min */ false)); +void InductionVarRange::GetInductionRange(HInstruction* context, + HInstruction* instruction, + /*out*/Value* min_val, + /*out*/Value* max_val, + /*out*/bool* needs_finite_test) { + HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (loop != nullptr) { + // Set up loop information. + HBasicBlock* header = loop->GetHeader(); + bool in_body = context->GetBlock() != header; + HInductionVarAnalysis::InductionInfo* info = + induction_analysis_->LookupInfo(loop, instruction); + HInductionVarAnalysis::InductionInfo* trip = + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); + // Find range. + *min_val = GetVal(info, trip, in_body, /* is_min */ true); + *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + } else { + // No loop to analyze. + *min_val = Value(); + *max_val = Value(); + *needs_finite_test = false; + } } bool InductionVarRange::CanGenerateCode(HInstruction* context, HInstruction* instruction, - /*out*/bool* top_test) { - return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test); + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) { + return GenerateCode(context, + instruction, + nullptr, nullptr, nullptr, nullptr, nullptr, // nothing generated yet + needs_finite_test, + needs_taken_test); } -bool InductionVarRange::GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper) { - return GenerateCode(context, instruction, graph, block, lower, upper, nullptr); +void InductionVarRange::GenerateRangeCode(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper) { + bool b1, b2; // unused + if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) { + LOG(FATAL) << "Failed precondition: GenerateCode()"; + } +} + +void InductionVarRange::GenerateTakenTest(HInstruction* context, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** taken_test) { + bool b1, b2; // unused + if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) { + LOG(FATAL) << "Failed precondition: GenerateCode()"; + } } // // Private class methods. // -InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context, - HInstruction* instruction, - bool is_min) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop != nullptr) { - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - return GetVal(induction_analysis_->LookupInfo(loop, instruction), - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()), - in_body, - is_min); +bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { + if (info != nullptr) { + if (info->induction_class == HInductionVarAnalysis::kLinear) { + return true; + } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) { + return NeedsTripCount(info->op_b); + } } - return Value(); + return false; +} + +bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { + if (trip != nullptr) { + if (trip->induction_class == HInductionVarAnalysis::kInvariant) { + return trip->operation == HInductionVarAnalysis::kTripCountInBody || + trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe; + } + } + return false; +} + +bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { + if (trip != nullptr) { + if (trip->induction_class == HInductionVarAnalysis::kInvariant) { + return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe || + trip->operation == HInductionVarAnalysis::kTripCountInLoopUnsafe; + } + } + return false; } InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, @@ -184,11 +236,13 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct case HInductionVarAnalysis::kFetch: return GetFetch(info->fetch, trip, in_body, is_min); case HInductionVarAnalysis::kTripCountInLoop: + case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (!in_body && !is_min) { // one extra! return GetVal(info->op_a, trip, in_body, is_min); } FALLTHROUGH_INTENDED; case HInductionVarAnalysis::kTripCountInBody: + case HInductionVarAnalysis::kTripCountInBodyUnsafe: if (is_min) { return Value(0); } else if (in_body) { @@ -356,25 +410,46 @@ bool InductionVarRange::GenerateCode(HInstruction* context, HBasicBlock* block, /*out*/HInstruction** lower, /*out*/HInstruction** upper, - /*out*/bool* top_test) { + /*out*/HInstruction** taken_test, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) { HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop if (loop != nullptr) { + // Set up loop information. HBasicBlock* header = loop->GetHeader(); bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); + HInductionVarAnalysis::InductionInfo* info = + induction_analysis_->LookupInfo(loop, instruction); + if (info == nullptr) { + return false; // nothing to analyze + } HInductionVarAnalysis::InductionInfo* trip = induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - if (info != nullptr && trip != nullptr) { - if (top_test != nullptr) { - *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop; + // Determine what tests are needed. A finite test is needed if the evaluation code uses the + // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot" + // the computed range). A taken test is needed for any unknown trip-count, even if evaluation + // code does not use the trip-count explicitly (since there could be an implicit relation + // between e.g. an invariant subscript and a not-taken condition). + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + *needs_taken_test = IsBodyTripCount(trip); + // Code generation for taken test: generate the code when requested or otherwise analyze + // if code generation is feasible when taken test is needed. + if (taken_test != nullptr) { + return GenerateCode( + trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false); + } else if (*needs_taken_test) { + if (!GenerateCode( + trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) { + return false; } - return + } + // Code generation for lower and upper. + return // Success on lower if invariant (not set), or code can be generated. ((info->induction_class == HInductionVarAnalysis::kInvariant) || GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) && // And success on upper. GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); - } } return false; } @@ -387,19 +462,38 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, bool in_body, bool is_min) { if (info != nullptr) { + // Handle current operation. Primitive::Type type = Primitive::kPrimInt; HInstruction* opa = nullptr; HInstruction* opb = nullptr; - int32_t value = 0; switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: // Invariants. switch (info->operation) { case HInductionVarAnalysis::kAdd: + case HInductionVarAnalysis::kLT: + case HInductionVarAnalysis::kLE: + case HInductionVarAnalysis::kGT: + case HInductionVarAnalysis::kGE: if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) && GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb)); + HInstruction* operation = nullptr; + switch (info->operation) { + case HInductionVarAnalysis::kAdd: + operation = new (graph->GetArena()) HAdd(type, opa, opb); break; + case HInductionVarAnalysis::kLT: + operation = new (graph->GetArena()) HLessThan(opa, opb); break; + case HInductionVarAnalysis::kLE: + operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break; + case HInductionVarAnalysis::kGT: + operation = new (graph->GetArena()) HGreaterThan(opa, opb); break; + case HInductionVarAnalysis::kGE: + operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break; + default: + LOG(FATAL) << "unknown operation"; + } + *result = Insert(block, operation); } return true; } @@ -422,16 +516,21 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } break; case HInductionVarAnalysis::kFetch: - if (graph != nullptr) { - *result = info->fetch; // already in HIR + if (info->fetch->GetType() == type) { + if (graph != nullptr) { + *result = info->fetch; // already in HIR + } + return true; } - return true; + break; case HInductionVarAnalysis::kTripCountInLoop: + case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (!in_body && !is_min) { // one extra! return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min); } FALLTHROUGH_INTENDED; case HInductionVarAnalysis::kTripCountInBody: + case HInductionVarAnalysis::kTripCountInBodyUnsafe: if (is_min) { if (graph != nullptr) { *result = graph->GetIntConstant(0); @@ -452,23 +551,44 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, break; } break; - case HInductionVarAnalysis::kLinear: + case HInductionVarAnalysis::kLinear: { // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only // to avoid arithmetic wrap-around situations that are hard to guard against. - if (GetConstant(info->op_a, &value)) { - if (value == 1 || value == -1) { - const bool is_min_a = value == 1 ? is_min : !is_min; + int32_t stride_value = 0; + if (GetConstant(info->op_a, &stride_value)) { + if (stride_value == 1 || stride_value == -1) { + const bool is_min_a = stride_value == 1 ? is_min : !is_min; if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb)); + HInstruction* oper; + if (stride_value == 1) { + oper = new (graph->GetArena()) HAdd(type, opa, opb); + } else { + oper = new (graph->GetArena()) HSub(type, opb, opa); + } + *result = Insert(block, oper); } return true; } } } break; - default: // TODO(ajcbik): add more cases + } + case HInductionVarAnalysis::kWrapAround: + case HInductionVarAnalysis::kPeriodic: { + // Wrap-around and periodic inductions are restricted to constants only, so that extreme + // values are easy to test at runtime without complications of arithmetic wrap-around. + Value extreme = GetVal(info, trip, in_body, is_min); + if (extreme.is_known && extreme.a_constant == 0) { + if (graph != nullptr) { + *result = graph->GetIntConstant(extreme.b_constant); + } + return true; + } + break; + } + default: break; } } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 7fa5a26dce..7984871b08 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -57,29 +57,33 @@ class InductionVarRange { explicit InductionVarRange(HInductionVarAnalysis* induction); /** - * Given a context denoted by the first instruction, returns a, - * possibly conservative, lower bound on the instruction's value. + * Given a context denoted by the first instruction, returns a possibly conservative + * lower and upper bound on the instruction's value in the output parameters min_val + * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test + * is needed to protect the range evaluation inside its loop. */ - Value GetMinInduction(HInstruction* context, HInstruction* instruction); + void GetInductionRange(HInstruction* context, + HInstruction* instruction, + /*out*/Value* min_val, + /*out*/Value* max_val, + /*out*/bool* needs_finite_test); /** - * Given a context denoted by the first instruction, returns a, - * possibly conservative, upper bound on the instruction's value. + * Returns true if range analysis is able to generate code for the lower and upper + * bound expressions on the instruction in the given context. The need_finite_test + * and need_taken test flags denote if an additional finite-test and/or taken-test + * are needed to protect the range evaluation inside its loop. */ - Value GetMaxInduction(HInstruction* context, HInstruction* instruction); - - /** - * Returns true if range analysis is able to generate code for the lower and upper bound - * expressions on the instruction in the given context. Output parameter top_test denotes - * whether a top test is needed to protect the trip-count expression evaluation. - */ - bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test); + bool CanGenerateCode(HInstruction* context, + HInstruction* instruction, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test); /** * Generates the actual code in the HIR for the lower and upper bound expressions on the * instruction in the given context. Code for the lower and upper bound expression are - * generated in given block and graph and are returned in lower and upper, respectively. - * For a loop invariant, lower is not set. + * generated in given block and graph and are returned in the output parameters lower and + * upper, respectively. For a loop invariant, lower is not set. * * For example, given expression x+i with range [0, 5] for i, calling this method * will generate the following sequence: @@ -87,20 +91,35 @@ class InductionVarRange { * block: * lower: add x, 0 * upper: add x, 5 + * + * Precondition: CanGenerateCode() returns true. */ - bool GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper); + void GenerateRangeCode(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper); + + /** + * Generates explicit taken-test for the loop in the given context. Code is generated in + * given block and graph. The taken-test is returned in parameter test. + * + * Precondition: CanGenerateCode() returns true and needs_taken_test is set. + */ + void GenerateTakenTest(HInstruction* context, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** taken_test); private: // // Private helper methods. // - Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min); + static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info); + static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip); + static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip); static Value GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, @@ -130,8 +149,8 @@ class InductionVarRange { static Value MergeVal(Value v1, Value v2, bool is_min); /** - * Generates code for lower/upper expression in the HIR. Returns true on success. - * With graph == nullptr, the method can be used to determine if code generation + * Generates code for lower/upper/taken-test in the HIR. Returns true on success. + * With values nullptr, the method can be used to determine if code generation * would be successful without generating actual code yet. */ bool GenerateCode(HInstruction* context, @@ -140,7 +159,9 @@ class InductionVarRange { HBasicBlock* block, /*out*/HInstruction** lower, /*out*/HInstruction** upper, - bool* top_test); + /*out*/HInstruction** taken_test, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test); static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index ce8926ad72..c2ba157ed8 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -46,6 +46,10 @@ class InductionVarRangeTest : public testing::Test { EXPECT_EQ(v1.is_known, v2.is_known); } + // + // Construction methods. + // + /** Constructs bare minimum graph. */ void BuildGraph() { graph_->SetNumberOfVRegs(1); @@ -58,7 +62,7 @@ class InductionVarRangeTest : public testing::Test { } /** Constructs loop with given upper bound. */ - void BuildLoop(HInstruction* upper) { + void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) { // Control flow. loop_preheader_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(loop_preheader_); @@ -66,29 +70,37 @@ class InductionVarRangeTest : public testing::Test { graph_->AddBlock(loop_header); HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(loop_body); + HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(return_block); entry_block_->AddSuccessor(loop_preheader_); loop_preheader_->AddSuccessor(loop_header); loop_header->AddSuccessor(loop_body); - loop_header->AddSuccessor(exit_block_); + loop_header->AddSuccessor(return_block); loop_body->AddSuccessor(loop_header); + return_block->AddSuccessor(exit_block_); // Instructions. HLocal* induc = new (&allocator_) HLocal(0); entry_block_->AddInstruction(induc); loop_preheader_->AddInstruction( - new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0))); // i = 0 + new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(lower))); // i = l loop_preheader_->AddInstruction(new (&allocator_) HGoto()); HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt); loop_header->AddInstruction(load); - condition_ = new (&allocator_) HLessThan(load, upper); + if (stride > 0) { + condition_ = new (&allocator_) HLessThan(load, upper); // i < u + } else { + condition_ = new (&allocator_) HGreaterThan(load, upper); // i > u + } loop_header->AddInstruction(condition_); - loop_header->AddInstruction(new (&allocator_) HIf(condition_)); // i < u + loop_header->AddInstruction(new (&allocator_) HIf(condition_)); load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt); loop_body->AddInstruction(load); - increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1)); + increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(stride)); loop_body->AddInstruction(increment_); - loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i++ + loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i += s loop_body->AddInstruction(new (&allocator_) HGoto()); - exit_block_->AddInstruction(new (&allocator_) HReturnVoid()); + return_block->AddInstruction(new (&allocator_) HReturnVoid()); + exit_block_->AddInstruction(new (&allocator_) HExit()); } /** Performs induction variable analysis. */ @@ -124,8 +136,20 @@ class InductionVarRangeTest : public testing::Test { } /** Constructs a trip-count. */ - HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) { - return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr); + HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) { + if (in_loop && safe) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr); + } else if (in_loop) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr); + } else if (safe) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr); + } else { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr); + } } /** Constructs a linear a * i + b induction. */ @@ -139,16 +163,34 @@ class InductionVarRangeTest : public testing::Test { HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi)); } + /** Constructs a wrap-around induction consisting of a constant, followed info */ + HInductionVarAnalysis::InductionInfo* CreateWrapAround( + int32_t initial, + HInductionVarAnalysis::InductionInfo* info) { + return iva_->CreateInduction(HInductionVarAnalysis::kWrapAround, CreateConst(initial), info); + } + /** Constructs a wrap-around induction consisting of a constant, followed by a range. */ HInductionVarAnalysis::InductionInfo* CreateWrapAround(int32_t initial, int32_t lo, int32_t hi) { - return iva_->CreateInduction( - HInductionVarAnalysis::kWrapAround, CreateConst(initial), CreateRange(lo, hi)); + return CreateWrapAround(initial, CreateRange(lo, hi)); } // // Relay methods. // + bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { + return InductionVarRange::NeedsTripCount(info); + } + + bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { + return InductionVarRange::IsBodyTripCount(trip); + } + + bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { + return InductionVarRange::IsUnsafeTripCount(trip); + } + Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true); @@ -202,6 +244,26 @@ class InductionVarRangeTest : public testing::Test { // Tests on static methods. // +TEST_F(InductionVarRangeTest, TripCountProperties) { + EXPECT_FALSE(NeedsTripCount(nullptr)); + EXPECT_FALSE(NeedsTripCount(CreateConst(1))); + EXPECT_TRUE(NeedsTripCount(CreateLinear(1, 1))); + EXPECT_FALSE(NeedsTripCount(CreateWrapAround(1, 2, 3))); + EXPECT_TRUE(NeedsTripCount(CreateWrapAround(1, CreateLinear(1, 1)))); + + EXPECT_FALSE(IsBodyTripCount(nullptr)); + EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, true))); + EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, false))); + EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, true))); + EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, false))); + + EXPECT_FALSE(IsUnsafeTripCount(nullptr)); + EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, true, true))); + EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, true, false))); + EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, false, true))); + EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, false, false))); +} + TEST_F(InductionVarRangeTest, GetMinMaxNull) { ExpectEqual(Value(), GetMin(nullptr, nullptr)); ExpectEqual(Value(), GetMax(nullptr, nullptr)); @@ -279,10 +341,10 @@ TEST_F(InductionVarRangeTest, GetMinMaxFetch) { } TEST_F(InductionVarRangeTest, GetMinMaxLinear) { - ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100))); - ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100))); - ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100))); - ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100))); + ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100, true, true))); } TEST_F(InductionVarRangeTest, GetMinMaxWrapAround) { @@ -398,61 +460,98 @@ TEST_F(InductionVarRangeTest, MaxValue) { // Tests on instance methods. // -TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) { - BuildLoop(graph_->GetIntConstant(1000)); +TEST_F(InductionVarRangeTest, ConstantTripCountUp) { + BuildLoop(0, graph_->GetIntConstant(1000), 1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); + Value v1, v2; + bool needs_finite_test = true; + // In context of header: known. - ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0))); - ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0))); + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(1000), v2); // In context of loop-body: known. - ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_)); - ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_)); + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(999), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(1000), v2); } -TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) { - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); - entry_block_->AddInstruction(parameter); - BuildLoop(parameter); +TEST_F(InductionVarRangeTest, ConstantTripCountDown) { + BuildLoop(1000, graph_->GetIntConstant(0), -1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); - // In context of header: full range unknown. - ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0))); - ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0))); + Value v1, v2; + bool needs_finite_test = true; + + // In context of header: known. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(1000), v2); // In context of loop-body: known. - ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_)); - ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_)); + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(1000), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(999), v2); } -TEST_F(InductionVarRangeTest, CodeGeneration) { +TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { HInstruction* parameter = new (&allocator_) HParameterValue( graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); entry_block_->AddInstruction(parameter); - BuildLoop(parameter); + BuildLoop(0, parameter, 1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); + Value v1, v2; + bool needs_finite_test = true; + bool needs_taken_test = true; + + // In context of header: upper unknown. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(), v2); + + // In context of loop-body: known. + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(parameter, 1, -1), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(parameter, 1, 0), v2); + HInstruction* lower = nullptr; HInstruction* upper = nullptr; - bool top_test = false; + HInstruction* taken = nullptr; // Can generate code in context of loop-body only. - EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test)); - ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test)); - EXPECT_TRUE(top_test); + EXPECT_FALSE(range.CanGenerateCode( + condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range.CanGenerateCode( + increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(needs_finite_test); + EXPECT_TRUE(needs_taken_test); // Generates code. - EXPECT_TRUE(range.GenerateCode( - increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper)); + range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); // Verify lower is 0+0. ASSERT_TRUE(lower != nullptr); @@ -462,7 +561,7 @@ TEST_F(InductionVarRangeTest, CodeGeneration) { ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue()); - // Verify upper is (V-1)+0 + // Verify upper is (V-1)+0. ASSERT_TRUE(upper != nullptr); ASSERT_TRUE(upper->IsAdd()); ASSERT_TRUE(upper->InputAt(0)->IsSub()); @@ -471,6 +570,91 @@ TEST_F(InductionVarRangeTest, CodeGeneration) { EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue()); ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify taken-test is 0<V. + range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + ASSERT_TRUE(taken != nullptr); + ASSERT_TRUE(taken->IsLessThan()); + ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); + EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue()); + EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); +} + +TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { + HInstruction* parameter = new (&allocator_) HParameterValue( + graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(parameter); + BuildLoop(1000, parameter, -1); + PerformInductionVarAnalysis(); + InductionVarRange range(iva_); + + Value v1, v2; + bool needs_finite_test = true; + bool needs_taken_test = true; + + // In context of header: lower unknown. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(), v1); + ExpectEqual(Value(1000), v2); + + // In context of loop-body: known. + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(parameter, 1, 1), v1); + ExpectEqual(Value(1000), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(parameter, 1, 0), v1); + ExpectEqual(Value(999), v2); + + HInstruction* lower = nullptr; + HInstruction* upper = nullptr; + HInstruction* taken = nullptr; + + // Can generate code in context of loop-body only. + EXPECT_FALSE(range.CanGenerateCode( + condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range.CanGenerateCode( + increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(needs_finite_test); + EXPECT_TRUE(needs_taken_test); + + // Generates code. + range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); + + // Verify lower is 1000-(-(V-1000)-1). + ASSERT_TRUE(lower != nullptr); + ASSERT_TRUE(lower->IsSub()); + ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue()); + lower = lower->InputAt(1); + ASSERT_TRUE(lower->IsSub()); + ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); + EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue()); + lower = lower->InputAt(0); + ASSERT_TRUE(lower->IsNeg()); + lower = lower->InputAt(0); + ASSERT_TRUE(lower->IsSub()); + EXPECT_TRUE(lower->InputAt(0)->IsParameterValue()); + ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); + EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify upper is 1000-0. + ASSERT_TRUE(upper != nullptr); + ASSERT_TRUE(upper->IsSub()); + ASSERT_TRUE(upper->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue()); + ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); + EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify taken-test is 1000>V. + range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + ASSERT_TRUE(taken != nullptr); + ASSERT_TRUE(taken->IsGreaterThan()); + ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue()); + EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); } } // namespace art diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 353881e47a..6d93be37a7 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -148,7 +148,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol // the target method. Since we check above the exact type of the receiver, // the only reason this can happen is an IncompatibleClassChangeError. return nullptr; - } else if (resolved_method->IsAbstract()) { + } else if (!resolved_method->IsInvokable()) { // The information we had on the receiver was not enough to find // the target method. Since we check above the exact type of the receiver, // the only reason this can happen is an IncompatibleClassChangeError. @@ -192,6 +192,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { // We can query the dex cache directly. The verifier has populated it already. ArtMethod* resolved_method; if (invoke_instruction->IsInvokeStaticOrDirect()) { + if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) { + VLOG(compiler) << "Not inlining a String.<init> method"; + return false; + } MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod(); mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file) ? caller_compilation_unit_.GetDexCache().Get() @@ -406,8 +410,8 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &type_propagation, &sharpening, &simplify, - &dce, &fold, + &dce, }; for (size_t i = 0; i < arraysize(optimizations); ++i) { @@ -534,6 +538,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, ReferenceTypeInfo::Create(obj_handle, false /* is_exact */)); } + // Check the integrity of reference types and run another type propagation if needed. if ((return_replacement != nullptr) && (return_replacement->GetType() == Primitive::kPrimNot)) { if (!return_replacement->GetReferenceTypeInfo().IsValid()) { @@ -544,10 +549,20 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, DCHECK(return_replacement->IsPhi()); size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); ReferenceTypeInfo::TypeHandle return_handle = - handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size)); + handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size)); return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); } + + // If the return type is a refinement of the declared type run the type propagation again. + ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); + ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); + if (invoke_rti.IsStrictSupertypeOf(return_rti) + || (return_rti.IsExact() && !invoke_rti.IsExact()) + || !return_replacement->CanBeNull()) { + ReferenceTypePropagation rtp_fixup(graph_, handles_); + rtp_fixup.Run(); + } } return true; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index b97dc1a511..2f3df7fc68 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -169,16 +169,6 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // src instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); - } else if (instruction->IsShl() && input_cst->IsOne()) { - // Replace Shl looking like - // SHL dst, src, 1 - // with - // ADD dst, src, src - HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), - input_other, - input_other); - instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); - RecordSimplification(); } } } @@ -372,9 +362,8 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { block->RemoveInstruction(equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { - // Replace (bool_value == false) with !bool_value - block->ReplaceAndRemoveInstructionWith( - equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal)); + block->RemoveInstruction(equal); RecordSimplification(); } else { // Replace (bool_value == integer_not_zero_nor_one_constant) with false @@ -399,9 +388,8 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { // We are comparing the boolean to a constant which is of type int and can // be any constant. if (input_const->AsIntConstant()->IsOne()) { - // Replace (bool_value != true) with !bool_value - block->ReplaceAndRemoveInstructionWith( - not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal)); + block->RemoveInstruction(not_equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { // Replace (bool_value != false) with bool_value @@ -796,6 +784,34 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HShl* shl = new(allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); RecordSimplification(); + } else if (IsPowerOfTwo(factor - 1)) { + // Transform code looking like + // MUL dst, src, (2^n + 1) + // into + // SHL tmp, src, n + // ADD dst, src, tmp + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1))); + HAdd* add = new (allocator) HAdd(type, input_other, shl); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + } else if (IsPowerOfTwo(factor + 1)) { + // Transform code looking like + // MUL dst, src, (2^n - 1) + // into + // SHL tmp, src, n + // SUB dst, tmp, src + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1))); + HSub* sub = new (allocator) HSub(type, shl, input_other); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, sub); + RecordSimplification(); } } } diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index eb79f469eb..6a34b13320 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -16,11 +16,16 @@ #include "instruction_simplifier_arm64.h" +#include "common_arm64.h" #include "mirror/array-inl.h" namespace art { namespace arm64 { +using helpers::CanFitInShifterOperand; +using helpers::HasShifterOperand; +using helpers::ShifterOperandSupportsExtension; + void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, @@ -62,6 +67,169 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio RecordSimplification(); } +bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge) { + DCHECK(HasShifterOperand(use)); + DCHECK(use->IsBinaryOperation() || use->IsNeg()); + DCHECK(CanFitInShifterOperand(bitfield_op)); + DCHECK(!bitfield_op->HasEnvironmentUses()); + + Primitive::Type type = use->GetType(); + if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + return false; + } + + HInstruction* left; + HInstruction* right; + if (use->IsBinaryOperation()) { + left = use->InputAt(0); + right = use->InputAt(1); + } else { + DCHECK(use->IsNeg()); + right = use->AsNeg()->InputAt(0); + left = GetGraph()->GetConstant(right->GetType(), 0); + } + DCHECK(left == bitfield_op || right == bitfield_op); + + if (left == right) { + // TODO: Handle special transformations in this situation? + // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`? + // Or should this be part of a separate transformation logic? + return false; + } + + bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative(); + HInstruction* other_input; + if (bitfield_op == right) { + other_input = left; + } else { + if (is_commutative) { + other_input = right; + } else { + return false; + } + } + + HArm64DataProcWithShifterOp::OpKind op_kind; + int shift_amount = 0; + HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); + + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) && + !ShifterOperandSupportsExtension(use)) { + return false; + } + + if (do_merge) { + HArm64DataProcWithShifterOp* alu_with_op = + new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); + use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); + if (bitfield_op->GetUses().IsEmpty()) { + bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); + } + RecordSimplification(); + } + + return true; +} + +// Merge a bitfield move instruction into its uses if it can be merged in all of them. +bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) { + DCHECK(CanFitInShifterOperand(bitfield_op)); + + if (bitfield_op->HasEnvironmentUses()) { + return false; + } + + const HUseList<HInstruction*>& uses = bitfield_op->GetUses(); + + // Check whether we can merge the instruction in all its users' shifter operand. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + if (!HasShifterOperand(use)) { + return false; + } + if (!CanMergeIntoShifterOperand(use, bitfield_op)) { + return false; + } + } + + // Merge the instruction into its uses. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + bool merged = MergeIntoShifterOperand(use, bitfield_op); + DCHECK(merged); + } + + return true; +} + +bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( + HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return false; +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -76,5 +244,110 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { Primitive::ComponentSize(instruction->GetComponentType())); } +void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntOrLongType(type)) { + return; + } + + HInstruction* use = instruction->HasNonEnvironmentUses() + ? instruction->GetUses().GetFirst()->GetUser() + : nullptr; + + if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == instruction) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, instruction); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HArm64MultiplyAccumulate* mulacc = + new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, + binop->GetKind(), + accumulator, + instruction->GetLeft(), + instruction->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!instruction->HasUses()); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (instruction->GetLeft() == instruction->GetRight()) { + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { + return; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { + return; + } +} + +void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) { + Primitive::Type result_type = instruction->GetResultType(); + Primitive::Type input_type = instruction->GetInputType(); + + if (input_type == result_type) { + // We let the arch-independent code handle this. + return; + } + + if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4b697dba0e..b7f490bb8c 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -39,9 +39,30 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* array, HInstruction* index, int access_size); + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, true); + } + + bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other); + // HInstruction visitors, sorted alphabetically. void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index dbe75249be..834081188b 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -89,10 +89,7 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) { } } -static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) { - if (instruction_set == kMips) { - return Intrinsics::kNone; - } +static Intrinsics GetIntrinsic(InlineMethod method) { switch (method.opcode) { // Floating-point conversions. case kIntrinsicDoubleCvt: @@ -387,7 +384,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile // InvokeStaticOrDirect. InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ? - invoke->AsInvokeStaticOrDirect()->GetInvokeType() : + invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() : invoke->IsInvokeVirtual() ? kVirtual : kSuper; switch (intrinsic_type) { case kStatic: @@ -431,7 +428,7 @@ void IntrinsicsRecognizer::Run() { DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file); DCHECK(inliner != nullptr); if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { - Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet()); + Intrinsics intrinsic = GetIntrinsic(method); if (intrinsic != Intrinsics::kNone) { if (!CheckInvokeType(intrinsic, invoke, dex_file)) { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 0a5acc3e64..d2017da221 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -44,7 +44,23 @@ using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitor bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ assembler-> @@ -662,20 +678,23 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); ArmAssembler* assembler = codegen->GetAssembler(); - Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location trg_loc = locations->Out(); if (type == Primitive::kPrimLong) { - Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); __ add(IP, base, ShifterOperand(offset)); if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { - Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); __ ldrexd(trg_lo, trg_hi, IP); } else { __ ldrd(trg_lo, Address(IP)); } } else { - Register trg = locations->Out().AsRegister<Register>(); + Register trg = trg_loc.AsRegister<Register>(); __ ldr(trg, Address(base, offset)); } @@ -684,14 +703,18 @@ static void GenUnsafeGet(HInvoke* invoke, } if (type == Primitive::kPrimNot) { - Register trg = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -936,6 +959,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Bind(&loop_head); __ ldrex(tmp_lo, tmp_ptr); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -964,7 +988,11 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } @@ -1400,6 +1428,10 @@ static void CheckPosition(ArmAssembler* assembler, } } +// TODO: Implement read barriers in the SystemArrayCopy intrinsic. +// Note that this code path is not used (yet) because we do not +// intrinsify methods that can go into the IntrinsicSlowPathARM +// slow path. void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 059abf090d..b04dcceb05 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -143,7 +143,23 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ masm-> @@ -818,9 +834,12 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. - Register trg = RegisterFrom(locations->Out(), type); + Location base_loc = locations->InAt(1); + Register base = WRegisterFrom(base_loc); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. + Location trg_loc = locations->Out(); + Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); @@ -837,13 +856,18 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1057,6 +1081,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (use_acquire_release) { __ Bind(&loop_head); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1065,6 +1092,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Dmb(InnerShareable, BarrierWrites); __ Bind(&loop_head); __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1090,7 +1120,11 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 5efcf4eadf..326844526e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -138,6 +138,323 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { #define __ assembler-> +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + + if (is64bit) { + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + __ Mfc1(out_lo, in); + __ Mfhc1(out_hi, in); + } else { + Register out = locations->Out().AsRegister<Register>(); + + __ Mfc1(out, in); + } +} + +// long java.lang.Double.doubleToRawLongBits(double) +void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Float.floatToRawIntBits(float) +void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + if (is64bit) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + + __ Mtc1(in_lo, out); + __ Mthc1(in_hi, out); + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + + __ Mtc1(in, out); + } +} + +// double java.lang.Double.longBitsToDouble(long) +void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Float.intBitsToFloat(int) +void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type type, + MipsAssembler* assembler, + bool isR2OrNewer) { + DCHECK(type == Primitive::kPrimShort || + type == Primitive::kPrimInt || + type == Primitive::kPrimLong); + + if (type == Primitive::kPrimShort) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Wsbh(out, in); + __ Seh(out, out); + } else { + __ Sll(TMP, in, 24); + __ Sra(TMP, TMP, 16); + __ Sll(out, in, 16); + __ Srl(out, out, 24); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimInt) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + } else { + // MIPS32r1 + // __ Rotr(out, in, 16); + __ Sll(TMP, in, 16); + __ Srl(out, in, 16); + __ Or(out, out, TMP); + // __ Wsbh(out, out); + __ LoadConst32(AT, 0x00FF00FF); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 8); + __ Srl(out, out, 8); + __ And(out, out, AT); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimLong) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + if (isR2OrNewer) { + __ Rotr(AT, in_hi, 16); + __ Rotr(TMP, in_lo, 16); + __ Wsbh(out_lo, AT); + __ Wsbh(out_hi, TMP); + } else { + // When calling CreateIntToIntLocations() we promised that the + // use of the out_lo/out_hi wouldn't overlap with the use of + // in_lo/in_hi. Be very careful not to write to out_lo/out_hi + // until we're completely done reading from in_lo/in_hi. + // __ Rotr(TMP, in_lo, 16); + __ Sll(TMP, in_lo, 16); + __ Srl(AT, in_lo, 16); + __ Or(TMP, TMP, AT); // Hold in TMP until it's safe + // to write to out_hi. + // __ Rotr(out_lo, in_hi, 16); + __ Sll(AT, in_hi, 16); + __ Srl(out_lo, in_hi, 16); // Here we are finally done reading + // from in_lo/in_hi so it's okay to + // write to out_lo/out_hi. + __ Or(out_lo, out_lo, AT); + // __ Wsbh(out_hi, out_hi); + __ LoadConst32(AT, 0x00FF00FF); + __ And(out_hi, TMP, AT); + __ Sll(out_hi, out_hi, 8); + __ Srl(TMP, TMP, 8); + __ And(TMP, TMP, AT); + __ Or(out_hi, out_hi, TMP); + // __ Wsbh(out_lo, out_lo); + __ And(TMP, out_lo, AT); // AT already holds the correct mask value + __ Sll(TMP, TMP, 8); + __ Srl(out_lo, out_lo, 8); + __ And(out_lo, out_lo, AT); + __ Or(out_lo, out_lo, TMP); + } + } +} + +// int java.lang.Integer.reverseBytes(int) +void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimInt, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// long java.lang.Long.reverseBytes(long) +void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimLong, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// short java.lang.Short.reverseBytes(short) +void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimShort, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// boolean java.lang.String.equals(Object anObject) +void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { + MipsAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register str = locations->InAt(0).AsRegister<Register>(); + Register arg = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + + MipsLabel loop; + MipsLabel end; + MipsLabel return_true; + MipsLabel return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // If the register containing the pointer to "this", and the register + // containing the pointer to "anObject" are the same register then + // "this", and "anObject" are the same object and we can + // short-circuit the logic to a true result. + if (str == arg) { + __ LoadConst32(out, 1); + return; + } + + // Check if input is null, return false if it is. + __ Beqz(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Beq(str, arg, &return_true); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bne(temp1, temp2, &return_false); + + // Load lengths of this and argument strings. + __ Lw(temp1, str, count_offset); + __ Lw(temp2, arg, count_offset); + // Check if lengths are equal, return false if they're not. + __ Bne(temp1, temp2, &return_false); + // Return true if both strings are empty. + __ Beqz(temp1, &return_true); + + // Don't overwrite input registers + __ Move(TMP, str); + __ Move(temp3, arg); + + // Assertions that must hold in order to compare strings 2 characters at a time. + DCHECK_ALIGNED(value_offset, 4); + static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings 2 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded. + __ Bind(&loop); + __ Lw(out, TMP, value_offset); + __ Lw(temp2, temp3, value_offset); + __ Bne(out, temp2, &return_false); + __ Addiu(TMP, TMP, 4); + __ Addiu(temp3, temp3, 4); + __ Addiu(temp1, temp1, -2); + __ Bgtz(temp1, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ LoadConst32(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ LoadConst32(out, 0); + __ Bind(&end); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -148,15 +465,8 @@ void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) -UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat) -UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble) -UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits) -UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits) UNIMPLEMENTED_INTRINSIC(MathAbsDouble) UNIMPLEMENTED_INTRINSIC(MathAbsFloat) UNIMPLEMENTED_INTRINSIC(MathAbsInt) @@ -204,7 +514,6 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(StringCharAt) UNIMPLEMENTED_INTRINSIC(StringCompareTo) -UNIMPLEMENTED_INTRINSIC(StringEquals) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 05c7eb02d9..ecee11dea6 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -101,11 +101,10 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0)); - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); } else { - UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; - UNREACHABLE(); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); } + codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); @@ -116,7 +115,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } @@ -807,7 +806,7 @@ static void GenRoundingMode(LocationSummary* locations, DCHECK_NE(in, out); - Label done; + Mips64Label done; // double floor/ceil(double in) { // if in.isNaN || in.isInfinite || in.isZero { @@ -1257,7 +1256,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - Label loop_head, exit_loop; + Mips64Label loop_head, exit_loop; __ Daddu(TMP, base, offset); __ Sync(0); __ Bind(&loop_head); @@ -1392,6 +1391,108 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +// boolean java.lang.String.equals(Object anObject) +void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); + GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); + + Mips64Label loop; + Mips64Label end; + Mips64Label return_true; + Mips64Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // If the register containing the pointer to "this", and the register + // containing the pointer to "anObject" are the same register then + // "this", and "anObject" are the same object and we can + // short-circuit the logic to a true result. + if (str == arg) { + __ LoadConst64(out, 1); + return; + } + + // Check if input is null, return false if it is. + __ Beqzc(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Beqc(str, arg, &return_true); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bnec(temp1, temp2, &return_false); + + // Load lengths of this and argument strings. + __ Lw(temp1, str, count_offset); + __ Lw(temp2, arg, count_offset); + // Check if lengths are equal, return false if they're not. + __ Bnec(temp1, temp2, &return_false); + // Return true if both strings are empty. + __ Beqzc(temp1, &return_true); + + // Don't overwrite input registers + __ Move(TMP, str); + __ Move(temp3, arg); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings 4 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded to be 8-byte aligned. + __ Bind(&loop); + __ Ld(out, TMP, value_offset); + __ Ld(temp2, temp3, value_offset); + __ Bnec(out, temp2, &return_false); + __ Daddiu(TMP, TMP, 8); + __ Daddiu(temp3, temp3, 8); + __ Addiu(temp1, temp1, -4); + __ Bgtzc(temp1, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ LoadConst64(out, 1); + __ Bc(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ LoadConst64(out, 0); + __ Bind(&end); +} + static void GenerateStringIndexOf(HInvoke* invoke, Mips64Assembler* assembler, CodeGeneratorMIPS64* codegen, @@ -1413,7 +1514,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // full slow-path down and branch unconditionally. slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } @@ -1587,8 +1688,6 @@ void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) -UNIMPLEMENTED_INTRINSIC(StringEquals) - UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 040bf6a45e..371588fc47 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -55,7 +55,23 @@ ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathX86 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { @@ -1571,26 +1587,32 @@ void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>())); } -static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, - bool is_volatile, X86Assembler* assembler) { - Register base = locations->InAt(1).AsRegister<Register>(); - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); - Location output = locations->Out(); +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorX86* codegen) { + X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); + Location output_loc = locations->Out(); switch (type) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register output_reg = output.AsRegister<Register>(); - __ movl(output_reg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + Register output = output_loc.AsRegister<Register>(); + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(output_reg); + codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); } break; } case Primitive::kPrimLong: { - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); + Register output_lo = output_loc.AsRegisterPairLow<Register>(); + Register output_hi = output_loc.AsRegisterPairHigh<Register>(); if (is_volatile) { // Need to use a XMM to read atomically. XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); @@ -1613,8 +1635,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long, bool is_volatile) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1653,22 +1680,22 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); } @@ -1890,13 +1917,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); + // In the case of the `UnsafeCASObject` intrinsic, accessing an + // object in the heap with LOCK CMPXCHG does not require a read + // barrier, as we do not keep a reference to this heap location. + // However, if heap poisoning is enabled, we need to unpoison the + // values that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value` has been moved to a temporary register, no need to @@ -1929,8 +1961,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code LOG(FATAL) << "Unexpected CAS type " << type; } - // locked cmpxchg has full barrier semantics, and we don't need - // scheduling barriers at this time. + // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we + // don't need scheduling barriers at this time. // Convert ZF into the boolean result. __ setb(kZero, out.AsRegister<Register>()); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 14c65c9aaf..2d9f01b821 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -50,8 +50,24 @@ ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); - const LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + LocationSummary* res = invoke->GetLocations(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathX86_64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { @@ -917,6 +933,10 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); } +// TODO: Implement read barriers in the SystemArrayCopy intrinsic. +// Note that this code path is not used (yet) because we do not +// intrinsify methods that can go into the IntrinsicSlowPathX86_64 +// slow path. void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1605,7 +1625,7 @@ static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1))); } static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { @@ -1698,23 +1718,30 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); } -static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, - bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) { - CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); - CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); - CpuRegister trg = locations->Out().AsRegister<CpuRegister>(); +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile ATTRIBUTE_UNUSED, + CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + Location base_loc = locations->InAt(1); + CpuRegister base = base_loc.AsRegister<CpuRegister>(); + Location offset_loc = locations->InAt(2); + CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); + Location output_loc = locations->Out(); + CpuRegister output = locations->Out().AsRegister<CpuRegister>(); switch (type) { case Primitive::kPrimInt: case Primitive::kPrimNot: - __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); } break; case Primitive::kPrimLong: - __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); break; default: @@ -1724,8 +1751,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1754,22 +1786,22 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invo void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); } @@ -1961,13 +1993,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. __ setcc(kZero, out); __ movzxb(out, out); + // In the case of the `UnsafeCASObject` intrinsic, accessing an + // object in the heap with LOCK CMPXCHG does not require a read + // barrier, as we do not keep a reference to this heap location. + // However, if heap poisoning is enabled, we need to unpoison the + // values that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value_reg` has been moved to a temporary register, no need @@ -1992,7 +2029,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c LOG(FATAL) << "Unexpected CAS type " << type; } - // locked cmpxchg has full barrier semantics, and we don't need + // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. // Convert ZF into the boolean result. diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 47457dec7d..2bb769a430 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -42,12 +42,14 @@ class LICMTest : public testing::Test { loop_preheader_ = new (&allocator_) HBasicBlock(graph_); loop_header_ = new (&allocator_) HBasicBlock(graph_); loop_body_ = new (&allocator_) HBasicBlock(graph_); + return_ = new (&allocator_) HBasicBlock(graph_); exit_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(entry_); graph_->AddBlock(loop_preheader_); graph_->AddBlock(loop_header_); graph_->AddBlock(loop_body_); + graph_->AddBlock(return_); graph_->AddBlock(exit_); graph_->SetEntryBlock(entry_); @@ -57,8 +59,9 @@ class LICMTest : public testing::Test { entry_->AddSuccessor(loop_preheader_); loop_preheader_->AddSuccessor(loop_header_); loop_header_->AddSuccessor(loop_body_); - loop_header_->AddSuccessor(exit_); + loop_header_->AddSuccessor(return_); loop_body_->AddSuccessor(loop_header_); + return_->AddSuccessor(exit_); // Provide boiler-plate instructions. parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot); @@ -89,6 +92,7 @@ class LICMTest : public testing::Test { HBasicBlock* loop_preheader_; HBasicBlock* loop_header_; HBasicBlock* loop_body_; + HBasicBlock* return_; HBasicBlock* exit_; HInstruction* parameter_; // "this" diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 6fbb6823d6..5b89cfef5a 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -119,19 +119,10 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { : ref_info_(ref_info), offset_(offset), index_(index), - declaring_class_def_index_(declaring_class_def_index), - may_become_unknown_(true) { + declaring_class_def_index_(declaring_class_def_index) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); - - if (ref_info->IsSingletonAndNotReturned()) { - // We try to track stores to singletons that aren't returned to eliminate the stores - // since values in singleton's fields cannot be killed due to aliasing. Those values - // can still be killed due to merging values since we don't build phi for merging heap - // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible. - may_become_unknown_ = false; - } } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } @@ -148,21 +139,11 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return index_ != nullptr; } - // Returns true if this heap location's value may become unknown after it's - // set to a value, due to merge of values, or killed due to aliasing. - bool MayBecomeUnknown() const { - return may_become_unknown_; - } - void SetMayBecomeUnknown(bool val) { - may_become_unknown_ = val; - } - private: ReferenceInfo* const ref_info_; // reference for instance/static field or array access. const size_t offset_; // offset of static/instance field. HInstruction* const index_; // index of an array element. const int16_t declaring_class_def_index_; // declaring class's def's dex index. - bool may_become_unknown_; // value may become kUnknownHeapValue. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -381,26 +362,13 @@ class HeapLocationCollector : public HGraphVisitor { return heap_locations_[heap_location_idx]; } - void VisitFieldAccess(HInstruction* field_access, - HInstruction* ref, - const FieldInfo& field_info, - bool is_store) { + void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { if (field_info.IsVolatile()) { has_volatile_ = true; } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); - // A store of a value may be eliminated if all future loads for that value can be eliminated. - // For a value that's stored into a singleton field, the value will not be killed due - // to aliasing. However if the value is set in a block that doesn't post dominate the definition, - // the value may be killed due to merging later. Before we have post dominating info, we check - // if the store is in the same block as the definition just to be conservative. - if (is_store && - location->GetReferenceInfo()->IsSingletonAndNotReturned() && - field_access->GetBlock() != ref->GetBlock()) { - location->SetMayBecomeUnknown(true); - } + GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); } void VisitArrayAccess(HInstruction* array, HInstruction* index) { @@ -409,20 +377,20 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); } void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true); + VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } @@ -464,9 +432,14 @@ class HeapLocationCollector : public HGraphVisitor { }; // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. +// A heap location can be set to kUnknownHeapValue when: +// - initially set a value. +// - killed due to aliasing, merging, invocation, or loop side effects. static HInstruction* const kUnknownHeapValue = reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1)); + // Default heap value after an allocation. +// A heap location can be set to that value right after an allocation. static HInstruction* const kDefaultHeapValue = reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2)); @@ -484,29 +457,17 @@ class LSEVisitor : public HGraphVisitor { kUnknownHeapValue, graph->GetArena()->Adapter(kArenaAllocLSE)), graph->GetArena()->Adapter(kArenaAllocLSE)), - removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)), - substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)), + removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), + substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), + possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)), singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { - int block_id = block->GetBlockId(); - ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + // Populate the heap_values array for this block. // TODO: try to reuse the heap_values array from one predecessor if possible. if (block->IsLoopHeader()) { - // We do a single pass in reverse post order. For loops, use the side effects as a hint - // to see if the heap values should be killed. - if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { - // Leave all values as kUnknownHeapValue. - } else { - // Inherit the values from pre-header. - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - for (size_t i = 0; i < heap_values.size(); i++) { - heap_values[i] = pre_header_heap_values[i]; - } - } + HandleLoopSideEffects(block); } else { MergePredecessorValues(block); } @@ -515,23 +476,34 @@ class LSEVisitor : public HGraphVisitor { // Remove recorded instructions that should be eliminated. void RemoveInstructions() { - size_t size = removed_instructions_.size(); - DCHECK_EQ(size, substitute_instructions_.size()); + size_t size = removed_loads_.size(); + DCHECK_EQ(size, substitute_instructions_for_loads_.size()); for (size_t i = 0; i < size; i++) { - HInstruction* instruction = removed_instructions_[i]; - DCHECK(instruction != nullptr); - HInstruction* substitute = substitute_instructions_[i]; - if (substitute != nullptr) { - // Keep tracing substitute till one that's not removed. - HInstruction* sub_sub = FindSubstitute(substitute); - while (sub_sub != substitute) { - substitute = sub_sub; - sub_sub = FindSubstitute(substitute); - } - instruction->ReplaceWith(substitute); + HInstruction* load = removed_loads_[i]; + DCHECK(load != nullptr); + DCHECK(load->IsInstanceFieldGet() || + load->IsStaticFieldGet() || + load->IsArrayGet()); + HInstruction* substitute = substitute_instructions_for_loads_[i]; + DCHECK(substitute != nullptr); + // Keep tracing substitute till one that's not removed. + HInstruction* sub_sub = FindSubstitute(substitute); + while (sub_sub != substitute) { + substitute = sub_sub; + sub_sub = FindSubstitute(substitute); } - instruction->GetBlock()->RemoveInstruction(instruction); + load->ReplaceWith(substitute); + load->GetBlock()->RemoveInstruction(load); } + + // At this point, stores in possibly_removed_stores_ can be safely removed. + size = possibly_removed_stores_.size(); + for (size_t i = 0; i < size; i++) { + HInstruction* store = possibly_removed_stores_[i]; + DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet()); + store->GetBlock()->RemoveInstruction(store); + } + // TODO: remove unnecessary allocations. // Eliminate instructions in singleton_new_instances_ that: // - don't have uses, @@ -541,6 +513,52 @@ class LSEVisitor : public HGraphVisitor { } private: + // If heap_values[index] is an instance field store, need to keep the store. + // This is necessary if a heap value is killed due to merging, or loop side + // effects (which is essentially merging also), since a load later from the + // location won't be eliminated. + void KeepIfIsStore(HInstruction* heap_value) { + if (heap_value == kDefaultHeapValue || + heap_value == kUnknownHeapValue || + !heap_value->IsInstanceFieldSet()) { + return; + } + auto idx = std::find(possibly_removed_stores_.begin(), + possibly_removed_stores_.end(), heap_value); + if (idx != possibly_removed_stores_.end()) { + // Make sure the store is kept. + possibly_removed_stores_.erase(idx); + } + } + + void HandleLoopSideEffects(HBasicBlock* block) { + DCHECK(block->IsLoopHeader()); + int block_id = block->GetBlockId(); + ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; + // We do a single pass in reverse post order. For loops, use the side effects as a hint + // to see if the heap values should be killed. + if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { + for (size_t i = 0; i < pre_header_heap_values.size(); i++) { + // heap value is killed by loop side effects, need to keep the last store. + KeepIfIsStore(pre_header_heap_values[i]); + } + if (kIsDebugBuild) { + // heap_values should all be kUnknownHeapValue that it is inited with. + for (size_t i = 0; i < heap_values.size(); i++) { + DCHECK_EQ(heap_values[i], kUnknownHeapValue); + } + } + } else { + // Inherit the values from pre-header. + for (size_t i = 0; i < heap_values.size(); i++) { + heap_values[i] = pre_header_heap_values[i]; + } + } + } + void MergePredecessorValues(HBasicBlock* block) { const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); if (predecessors.size() == 0) { @@ -548,16 +566,25 @@ class LSEVisitor : public HGraphVisitor { } ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { - HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i]; - if (value != kUnknownHeapValue) { + HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i]; + heap_values[i] = pred0_value; + if (pred0_value != kUnknownHeapValue) { for (size_t j = 1; j < predecessors.size(); j++) { - if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) { - value = kUnknownHeapValue; + HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i]; + if (pred_value != pred0_value) { + heap_values[i] = kUnknownHeapValue; break; } } } - heap_values[i] = value; + + if (heap_values[i] == kUnknownHeapValue) { + // Keep the last store in each predecessor since future loads cannot be eliminated. + for (size_t j = 0; j < predecessors.size(); j++) { + ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()]; + KeepIfIsStore(pred_values[i]); + } + } } } @@ -616,21 +643,30 @@ class LSEVisitor : public HGraphVisitor { HInstruction* heap_value = heap_values[idx]; if (heap_value == kDefaultHeapValue) { HInstruction* constant = GetDefaultValue(instruction->GetType()); - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(constant); + removed_loads_.push_back(instruction); + substitute_instructions_for_loads_.push_back(constant); heap_values[idx] = constant; return; } + if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) { + HInstruction* store = heap_value; + // This load must be from a singleton since it's from the same field + // that a "removed" store puts the value. That store must be to a singleton's field. + DCHECK(ref_info->IsSingleton()); + // Get the real heap value of the store. + heap_value = store->InputAt(1); + } if ((heap_value != kUnknownHeapValue) && // Keep the load due to possible I/F, J/D array aliasing. // See b/22538329 for details. (heap_value->GetType() == instruction->GetType())) { - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(heap_value); + removed_loads_.push_back(instruction); + substitute_instructions_for_loads_.push_back(heap_value); TryRemovingNullCheck(instruction); return; } + // Load isn't eliminated. if (heap_value == kUnknownHeapValue) { // Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. @@ -662,51 +698,63 @@ class LSEVisitor : public HGraphVisitor { ArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool redundant_store = false; + bool same_value = false; + bool possibly_redundant = false; if (Equal(heap_value, value)) { // Store into the heap location with the same value. - redundant_store = true; + same_value = true; } else if (index != nullptr) { // For array element, don't eliminate stores since it can be easily aliased // with non-constant index. } else if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned() && - !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) { - // Store into a field of a singleton that's not returned. And that value cannot be - // killed due to merge. It's redundant since future loads will get the value - // set by this instruction. - Primitive::Type type = Primitive::kPrimVoid; - if (instruction->IsInstanceFieldSet()) { - type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType(); - } else if (instruction->IsStaticFieldSet()) { - type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType(); - } else { - DCHECK(false) << "Must be an instance/static field set instruction."; - } - if (value->GetType() != type) { - // I/F, J/D aliasing should not happen for fields. - DCHECK(Primitive::IsIntegralType(value->GetType())); - DCHECK(!Primitive::Is64BitType(value->GetType())); - DCHECK(Primitive::IsIntegralType(type)); - DCHECK(!Primitive::Is64BitType(type)); - // Keep the store since the corresponding load isn't eliminated due to different types. - // TODO: handle the different int types so that we can eliminate this store. - redundant_store = false; + ref_info->IsSingletonAndNotReturned()) { + // Store into a field of a singleton that's not returned. The value cannot be + // killed due to aliasing/invocation. It can be redundant since future loads can + // directly get the value set by this instruction. The value can still be killed due to + // merging or loop side effects. Stores whose values are killed due to merging/loop side + // effects later will be removed from possibly_removed_stores_ when that is detected. + possibly_redundant = true; + HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance(); + DCHECK(new_instance != nullptr); + if (new_instance->IsFinalizable()) { + // Finalizable objects escape globally. Need to keep the store. + possibly_redundant = false; } else { - redundant_store = true; + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info != nullptr) { + // instruction is a store in the loop so the loop must does write. + DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + + if (loop_info->IsLoopInvariant(original_ref, false)) { + DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); + // Keep the store since its value may be needed at the loop header. + possibly_redundant = false; + } else { + // The singleton is created inside the loop. Value stored to it isn't needed at + // the loop header. This is true for outer loops also. + } + } } - // TODO: eliminate the store if the singleton object is not finalizable. - redundant_store = false; } - if (redundant_store) { - removed_instructions_.push_back(instruction); - substitute_instructions_.push_back(nullptr); - TryRemovingNullCheck(instruction); + if (same_value || possibly_redundant) { + possibly_removed_stores_.push_back(instruction); } - heap_values[idx] = value; + if (!same_value) { + if (possibly_redundant) { + DCHECK(instruction->IsInstanceFieldSet()); + // Put the store as the heap value. If the value is loaded from heap + // by a load later, this store isn't really redundant. + heap_values[idx] = instruction; + } else { + heap_values[idx] = value; + } + } // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { + if (i == idx) { + continue; + } if (heap_values[i] == value) { // Same value should be kept even if aliasing happens. continue; @@ -834,9 +882,10 @@ class LSEVisitor : public HGraphVisitor { return; } if (!heap_location_collector_.MayDeoptimize() && - ref_info->IsSingletonAndNotReturned()) { - // The allocation might be eliminated. - singleton_new_instances_.push_back(new_instance); + ref_info->IsSingletonAndNotReturned() && + !new_instance->IsFinalizable() && + !new_instance->CanThrow()) { + // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination. } ArenaVector<HInstruction*>& heap_values = heap_values_for_[new_instance->GetBlock()->GetBlockId()]; @@ -854,10 +903,10 @@ class LSEVisitor : public HGraphVisitor { // Find an instruction's substitute if it should be removed. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { - size_t size = removed_instructions_.size(); + size_t size = removed_loads_.size(); for (size_t i = 0; i < size; i++) { - if (removed_instructions_[i] == instruction) { - return substitute_instructions_[i]; + if (removed_loads_[i] == instruction) { + return substitute_instructions_for_loads_[i]; } } return instruction; @@ -871,8 +920,13 @@ class LSEVisitor : public HGraphVisitor { // We record the instructions that should be eliminated but may be // used by heap locations. They'll be removed in the end. - ArenaVector<HInstruction*> removed_instructions_; - ArenaVector<HInstruction*> substitute_instructions_; + ArenaVector<HInstruction*> removed_loads_; + ArenaVector<HInstruction*> substitute_instructions_for_loads_; + + // Stores in this list may be removed from the list later when it's + // found that the store cannot be eliminated. + ArenaVector<HInstruction*> possibly_removed_stores_; + ArenaVector<HInstruction*> singleton_new_instances_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index ebdf7a2f65..1ab206f69e 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -17,6 +17,7 @@ #include "locations.h" #include "nodes.h" +#include "code_generator.h" namespace art { @@ -47,18 +48,26 @@ Location Location::RegisterOrConstant(HInstruction* instruction) { : Location::RequiresRegister(); } -Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) { - if (instruction->IsIntConstant() || instruction->IsNullConstant()) { - return Location::ConstantLocation(instruction->AsConstant()); - } else if (instruction->IsLongConstant()) { - // Does the long constant fit in a 32 bit int? - int64_t value = instruction->AsLongConstant()->GetValue(); - return IsInt<32>(value) - ? Location::ConstantLocation(instruction->AsConstant()) - : Location::RequiresRegister(); - } else { - return Location::RequiresRegister(); +Location Location::RegisterOrInt32Constant(HInstruction* instruction) { + HConstant* constant = instruction->AsConstant(); + if (constant != nullptr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + if (IsInt<32>(value)) { + return Location::ConstantLocation(constant); + } } + return Location::RequiresRegister(); +} + +Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) { + HConstant* constant = instruction->AsConstant(); + if (constant != nullptr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + if (IsInt<32>(value)) { + return Location::ConstantLocation(constant); + } + } + return Location::RequiresFpuRegister(); } Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { @@ -67,6 +76,12 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { : Location::RegisterLocation(reg); } +Location Location::FpuRegisterOrConstant(HInstruction* instruction) { + return instruction->IsConstant() + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresFpuRegister(); +} + std::ostream& operator<<(std::ostream& os, const Location& location) { os << location.DebugString(); if (location.IsRegister() || location.IsFpuRegister()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index d014379bca..63bbc2cd0a 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -354,8 +354,10 @@ class Location : public ValueObject { } static Location RegisterOrConstant(HInstruction* instruction); - static Location RegisterOrInt32LongConstant(HInstruction* instruction); + static Location RegisterOrInt32Constant(HInstruction* instruction); static Location ByteRegisterOrConstant(int reg, HInstruction* instruction); + static Location FpuRegisterOrConstant(HInstruction* instruction); + static Location FpuRegisterOrInt32Constant(HInstruction* instruction); // The location of the first input to the instruction will be // used to replace this unallocated location. @@ -592,6 +594,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return intrinsified_; } + void SetIntrinsified(bool intrinsified) { + intrinsified_ = intrinsified; + } + private: ArenaVector<Location> inputs_; ArenaVector<Location> temps_; @@ -611,7 +617,7 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { RegisterSet live_registers_; // Whether these are locations for an intrinsified call. - const bool intrinsified_; + bool intrinsified_; ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint); ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 68fb0acf7f..9b26de44fe 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -17,6 +17,7 @@ #include "nodes.h" #include "code_generator.h" +#include "common_dominator.h" #include "ssa_builder.h" #include "base/bit_vector-inl.h" #include "base/bit_utils.h" @@ -179,7 +180,10 @@ void HGraph::ComputeDominanceInformation() { if (successor->GetDominator() == nullptr) { successor->SetDominator(current); } else { - successor->SetDominator(FindCommonDominator(successor->GetDominator(), current)); + // The CommonDominator can work for multiple blocks as long as the + // domination information doesn't change. However, since we're changing + // that information here, we can use the finder only for pairs of blocks. + successor->SetDominator(CommonDominator::ForPair(successor->GetDominator(), current)); } // Once all the forward edges have been visited, we know the immediate @@ -194,24 +198,6 @@ void HGraph::ComputeDominanceInformation() { } } -HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const { - ArenaBitVector visited(arena_, blocks_.size(), false); - // Walk the dominator tree of the first block and mark the visited blocks. - while (first != nullptr) { - visited.SetBit(first->GetBlockId()); - first = first->GetDominator(); - } - // Walk the dominator tree of the second block until a marked block is found. - while (second != nullptr) { - if (visited.IsBitSet(second->GetBlockId())) { - return second; - } - second = second->GetDominator(); - } - LOG(ERROR) << "Could not find common dominator"; - return nullptr; -} - void HGraph::TransformToSsa() { DCHECK(!reverse_post_order_.empty()); SsaBuilder ssa_builder(this); @@ -335,14 +321,24 @@ void HGraph::SimplifyCatchBlocks() { // instructions into `normal_block` and links the two blocks with a Goto. // Afterwards, incoming normal-flow edges are re-linked to `normal_block`, // leaving `catch_block` with the exceptional edges only. + // // Note that catch blocks with normal-flow predecessors cannot begin with - // a MOVE_EXCEPTION instruction, as guaranteed by the verifier. - DCHECK(!catch_block->GetFirstInstruction()->IsLoadException()); - HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction()); - for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) { - if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { - catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block); - --j; + // a move-exception instruction, as guaranteed by the verifier. However, + // trivially dead predecessors are ignored by the verifier and such code + // has not been removed at this stage. We therefore ignore the assumption + // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628). + HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException(); + if (normal_block == nullptr) { + // Catch block is either empty or only contains a move-exception. It must + // therefore be dead and will be removed during initial DCE. Do nothing. + DCHECK(!catch_block->EndsWithControlFlowInstruction()); + } else { + // Catch block was split. Re-link normal-flow edges to the new block. + for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) { + if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { + catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block); + --j; + } } } } @@ -366,28 +362,45 @@ void HGraph::ComputeTryBlockInformation() { HBasicBlock* first_predecessor = block->GetPredecessors()[0]; DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor)); const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors(); - if (try_entry != nullptr) { + if (try_entry != nullptr && + (block->GetTryCatchInformation() == nullptr || + try_entry != &block->GetTryCatchInformation()->GetTryEntry())) { + // We are either setting try block membership for the first time or it + // has changed. block->SetTryCatchInformation(new (arena_) TryCatchInformation(*try_entry)); } } } void HGraph::SimplifyCFG() { - // Simplify the CFG for future analysis, and code generation: +// Simplify the CFG for future analysis, and code generation: // (1): Split critical edges. - // (2): Simplify loops by having only one back edge, and one preheader. + // (2): Simplify loops by having only one preheader. // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators // can be invalidated. We remember the initial size to avoid iterating over the new blocks. for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) { HBasicBlock* block = blocks_[block_id]; if (block == nullptr) continue; - if (block->NumberOfNormalSuccessors() > 1) { - for (size_t j = 0; j < block->GetSuccessors().size(); ++j) { - HBasicBlock* successor = block->GetSuccessors()[j]; + if (block->GetSuccessors().size() > 1) { + // Only split normal-flow edges. We cannot split exceptional edges as they + // are synthesized (approximate real control flow), and we do not need to + // anyway. Moves that would be inserted there are performed by the runtime. + ArrayRef<HBasicBlock* const> normal_successors = block->GetNormalSuccessors(); + for (size_t j = 0, e = normal_successors.size(); j < e; ++j) { + HBasicBlock* successor = normal_successors[j]; DCHECK(!successor->IsCatchBlock()); - if (successor->GetPredecessors().size() > 1) { + if (successor == exit_block_) { + // Throw->TryBoundary->Exit. Special case which we do not want to split + // because Goto->Exit is not allowed. + DCHECK(block->IsSingleTryBoundary()); + DCHECK(block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()); + } else if (successor->GetPredecessors().size() > 1) { SplitCriticalEdge(block, successor); - --j; + // SplitCriticalEdge could have invalidated the `normal_successors` + // ArrayRef. We must re-acquire it. + normal_successors = block->GetNormalSuccessors(); + DCHECK_EQ(normal_successors[j]->GetSingleSuccessor(), successor); + DCHECK_EQ(e, normal_successors.size()); } } } @@ -1082,6 +1095,8 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const { } else if (GetRight()->IsLongConstant()) { return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant()); } + } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) { + return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant()); } return nullptr; } @@ -1162,8 +1177,61 @@ void HInstruction::MoveBefore(HInstruction* cursor) { } } +void HInstruction::MoveBeforeFirstUserAndOutOfLoops() { + DCHECK(!CanThrow()); + DCHECK(!HasSideEffects()); + DCHECK(!HasEnvironmentUses()); + DCHECK(HasNonEnvironmentUses()); + DCHECK(!IsPhi()); // Makes no sense for Phi. + DCHECK_EQ(InputCount(), 0u); + + // Find the target block. + HUseIterator<HInstruction*> uses_it(GetUses()); + HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock(); + uses_it.Advance(); + while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) { + uses_it.Advance(); + } + if (!uses_it.Done()) { + // This instruction has uses in two or more blocks. Find the common dominator. + CommonDominator finder(target_block); + for (; !uses_it.Done(); uses_it.Advance()) { + finder.Update(uses_it.Current()->GetUser()->GetBlock()); + } + target_block = finder.Get(); + DCHECK(target_block != nullptr); + } + // Move to the first dominator not in a loop. + while (target_block->IsInLoop()) { + target_block = target_block->GetDominator(); + DCHECK(target_block != nullptr); + } + + // Find insertion position. + HInstruction* insert_pos = nullptr; + for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) { + if (uses_it2.Current()->GetUser()->GetBlock() == target_block && + (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) { + insert_pos = uses_it2.Current()->GetUser(); + } + } + if (insert_pos == nullptr) { + // No user in `target_block`, insert before the control flow instruction. + insert_pos = target_block->GetLastInstruction(); + DCHECK(insert_pos->IsControlFlow()); + // Avoid splitting HCondition from HIf to prevent unnecessary materialization. + if (insert_pos->IsIf()) { + HInstruction* if_input = insert_pos->AsIf()->InputAt(0); + if (if_input == insert_pos->GetPrevious()) { + insert_pos = if_input; + } + } + } + MoveBefore(insert_pos); +} + HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), @@ -1193,7 +1261,7 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { } HBasicBlock* HBasicBlock::CreateImmediateDominator() { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented."; HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); @@ -1209,6 +1277,34 @@ HBasicBlock* HBasicBlock::CreateImmediateDominator() { return new_block; } +HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() { + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; + DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only."; + + HInstruction* first_insn = GetFirstInstruction(); + HInstruction* split_before = nullptr; + + if (first_insn != nullptr && first_insn->IsLoadException()) { + // Catch block starts with a LoadException. Split the block after + // the StoreLocal and ClearException which must come after the load. + DCHECK(first_insn->GetNext()->IsStoreLocal()); + DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); + split_before = first_insn->GetNext()->GetNext()->GetNext(); + } else { + // Catch block does not load the exception. Split at the beginning + // to create an empty catch block. + split_before = first_insn; + } + + if (split_before == nullptr) { + // Catch block has no instructions after the split point (must be dead). + // Do not split it but rather signal error by returning nullptr. + return nullptr; + } else { + return SplitBefore(split_before); + } +} + HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { DCHECK(!cursor->IsControlFlow()); DCHECK_NE(instructions_.last_instruction_, cursor); @@ -1293,17 +1389,38 @@ bool HBasicBlock::HasSinglePhi() const { return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr; } +ArrayRef<HBasicBlock* const> HBasicBlock::GetNormalSuccessors() const { + if (EndsWithTryBoundary()) { + // The normal-flow successor of HTryBoundary is always stored at index zero. + DCHECK_EQ(successors_[0], GetLastInstruction()->AsTryBoundary()->GetNormalFlowSuccessor()); + return ArrayRef<HBasicBlock* const>(successors_).SubArray(0u, 1u); + } else { + // All successors of blocks not ending with TryBoundary are normal. + return ArrayRef<HBasicBlock* const>(successors_); + } +} + +ArrayRef<HBasicBlock* const> HBasicBlock::GetExceptionalSuccessors() const { + if (EndsWithTryBoundary()) { + return GetLastInstruction()->AsTryBoundary()->GetExceptionHandlers(); + } else { + // Blocks not ending with TryBoundary do not have exceptional successors. + return ArrayRef<HBasicBlock* const>(); + } +} + bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const { - if (GetBlock()->GetSuccessors().size() != other.GetBlock()->GetSuccessors().size()) { + ArrayRef<HBasicBlock* const> handlers1 = GetExceptionHandlers(); + ArrayRef<HBasicBlock* const> handlers2 = other.GetExceptionHandlers(); + + size_t length = handlers1.size(); + if (length != handlers2.size()) { return false; } // Exception handlers need to be stored in the same order. - for (HExceptionHandlerIterator it1(*this), it2(other); - !it1.Done(); - it1.Advance(), it2.Advance()) { - DCHECK(!it2.Done()); - if (it1.Current() != it2.Current()) { + for (size_t i = 0; i < length; ++i) { + if (handlers1[i] != handlers2[i]) { return false; } } @@ -1356,7 +1473,7 @@ void HBasicBlock::DisconnectAndDelete() { // iteration. DCHECK(dominated_blocks_.empty()); - // Remove the block from all loops it is included in. + // (1) Remove the block from all loops it is included in. for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) { HLoopInformation* loop_info = it.Current(); loop_info->Remove(this); @@ -1368,17 +1485,34 @@ void HBasicBlock::DisconnectAndDelete() { } } - // Disconnect the block from its predecessors and update their control-flow - // instructions. + // (2) Disconnect the block from its predecessors and update their + // control-flow instructions. for (HBasicBlock* predecessor : predecessors_) { HInstruction* last_instruction = predecessor->GetLastInstruction(); + if (last_instruction->IsTryBoundary() && !IsCatchBlock()) { + // This block is the only normal-flow successor of the TryBoundary which + // makes `predecessor` dead. Since DCE removes blocks in post order, + // exception handlers of this TryBoundary were already visited and any + // remaining handlers therefore must be live. We remove `predecessor` from + // their list of predecessors. + DCHECK_EQ(last_instruction->AsTryBoundary()->GetNormalFlowSuccessor(), this); + while (predecessor->GetSuccessors().size() > 1) { + HBasicBlock* handler = predecessor->GetSuccessors()[1]; + DCHECK(handler->IsCatchBlock()); + predecessor->RemoveSuccessor(handler); + handler->RemovePredecessor(predecessor); + } + } + predecessor->RemoveSuccessor(this); uint32_t num_pred_successors = predecessor->GetSuccessors().size(); if (num_pred_successors == 1u) { // If we have one successor after removing one, then we must have - // had an HIf or HPackedSwitch, as they have more than one successor. - // Replace those with a HGoto. - DCHECK(last_instruction->IsIf() || last_instruction->IsPackedSwitch()); + // had an HIf, HPackedSwitch or HTryBoundary, as they have more than one + // successor. Replace those with a HGoto. + DCHECK(last_instruction->IsIf() || + last_instruction->IsPackedSwitch() || + (last_instruction->IsTryBoundary() && IsCatchBlock())); predecessor->RemoveInstruction(last_instruction); predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc())); } else if (num_pred_successors == 0u) { @@ -1387,15 +1521,17 @@ void HBasicBlock::DisconnectAndDelete() { // SSAChecker fails unless it is not removed during the pass too. predecessor->RemoveInstruction(last_instruction); } else { - // There are multiple successors left. This must come from a HPackedSwitch - // and we are in the middle of removing the HPackedSwitch. Like above, leave - // this alone, and the SSAChecker will fail if it is not removed as well. - DCHECK(last_instruction->IsPackedSwitch()); + // There are multiple successors left. The removed block might be a successor + // of a PackedSwitch which will be completely removed (perhaps replaced with + // a Goto), or we are deleting a catch block from a TryBoundary. In either + // case, leave `last_instruction` as is for now. + DCHECK(last_instruction->IsPackedSwitch() || + (last_instruction->IsTryBoundary() && IsCatchBlock())); } } predecessors_.clear(); - // Disconnect the block from its successors and update their phis. + // (3) Disconnect the block from its successors and update their phis. for (HBasicBlock* successor : successors_) { // Delete this block from the list of predecessors. size_t this_index = successor->GetPredecessorIndexOf(this); @@ -1405,30 +1541,57 @@ void HBasicBlock::DisconnectAndDelete() { // dominator of `successor` which violates the order DCHECKed at the top. DCHECK(!successor->predecessors_.empty()); - // Remove this block's entries in the successor's phis. - if (successor->predecessors_.size() == 1u) { - // The successor has just one predecessor left. Replace phis with the only - // remaining input. - for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - HPhi* phi = phi_it.Current()->AsPhi(); - phi->ReplaceWith(phi->InputAt(1 - this_index)); - successor->RemovePhi(phi); - } - } else { - for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - phi_it.Current()->AsPhi()->RemoveInputAt(this_index); + // Remove this block's entries in the successor's phis. Skip exceptional + // successors because catch phi inputs do not correspond to predecessor + // blocks but throwing instructions. Their inputs will be updated in step (4). + if (!successor->IsCatchBlock()) { + if (successor->predecessors_.size() == 1u) { + // The successor has just one predecessor left. Replace phis with the only + // remaining input. + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + phi->ReplaceWith(phi->InputAt(1 - this_index)); + successor->RemovePhi(phi); + } + } else { + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(this_index); + } } } } successors_.clear(); + // (4) Remove instructions and phis. Instructions should have no remaining uses + // except in catch phis. If an instruction is used by a catch phi at `index`, + // remove `index`-th input of all phis in the catch block since they are + // guaranteed dead. Note that we may miss dead inputs this way but the + // graph will always remain consistent. + for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* insn = it.Current(); + while (insn->HasUses()) { + DCHECK(IsTryBlock()); + HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst(); + size_t use_index = use->GetIndex(); + HBasicBlock* user_block = use->GetUser()->GetBlock(); + DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock()); + for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(use_index); + } + } + + RemoveInstruction(insn); + } + for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) { + RemovePhi(it.Current()->AsPhi()); + } + // Disconnect from the dominator. dominator_->RemoveDominatedBlock(this); SetDominator(nullptr); - // Delete from the graph. The function safely deletes remaining instructions - // and updates the reverse post order. - graph_->DeleteDeadBlock(this); + // Delete from the graph, update reverse post order. + graph_->DeleteDeadEmptyBlock(this); SetGraph(nullptr); } @@ -1475,7 +1638,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) { other->predecessors_.clear(); // Delete `other` from the graph. The function updates reverse post order. - graph_->DeleteDeadBlock(other); + graph_->DeleteDeadEmptyBlock(other); other->SetGraph(nullptr); } @@ -1539,19 +1702,14 @@ static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks, std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end()); } -void HGraph::DeleteDeadBlock(HBasicBlock* block) { +void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) { DCHECK_EQ(block->GetGraph(), this); DCHECK(block->GetSuccessors().empty()); DCHECK(block->GetPredecessors().empty()); DCHECK(block->GetDominatedBlocks().empty()); DCHECK(block->GetDominator() == nullptr); - - for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - block->RemoveInstruction(it.Current()); - } - for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - block->RemovePhi(it.Current()->AsPhi()); - } + DCHECK(block->GetInstructions().IsEmpty()); + DCHECK(block->GetPhis().IsEmpty()); if (block->IsExitBlock()) { exit_block_ = nullptr; @@ -1654,6 +1812,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // (2) the reverse post order of that graph, // (3) the potential loop information they are now in, // (4) try block membership. + // Note that we do not need to update catch phi inputs because they + // correspond to the register file of the outer method which the inlinee + // cannot modify. // We don't add the entry block, the exit block, and the first block, which // has been merged with `at`. @@ -1782,7 +1943,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { * | * if_block * / \ - * dummy_block deopt_block + * true_block false_block * \ / * new_pre_header * | @@ -1790,62 +1951,73 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { */ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); + HBasicBlock* old_pre_header = header->GetDominator(); - // Need this to avoid critical edge. + // Need extra block to avoid critical edge. HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - // Need this to avoid critical edge. - HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc()); HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(if_block); - AddBlock(dummy_block); - AddBlock(deopt_block); + AddBlock(true_block); + AddBlock(false_block); AddBlock(new_pre_header); - header->ReplacePredecessor(pre_header, new_pre_header); - pre_header->successors_.clear(); - pre_header->dominated_blocks_.clear(); - - pre_header->AddSuccessor(if_block); - if_block->AddSuccessor(dummy_block); // True successor - if_block->AddSuccessor(deopt_block); // False successor - dummy_block->AddSuccessor(new_pre_header); - deopt_block->AddSuccessor(new_pre_header); - - pre_header->dominated_blocks_.push_back(if_block); - if_block->SetDominator(pre_header); - if_block->dominated_blocks_.push_back(dummy_block); - dummy_block->SetDominator(if_block); - if_block->dominated_blocks_.push_back(deopt_block); - deopt_block->SetDominator(if_block); + header->ReplacePredecessor(old_pre_header, new_pre_header); + old_pre_header->successors_.clear(); + old_pre_header->dominated_blocks_.clear(); + + old_pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(true_block); // True successor + if_block->AddSuccessor(false_block); // False successor + true_block->AddSuccessor(new_pre_header); + false_block->AddSuccessor(new_pre_header); + + old_pre_header->dominated_blocks_.push_back(if_block); + if_block->SetDominator(old_pre_header); + if_block->dominated_blocks_.push_back(true_block); + true_block->SetDominator(if_block); + if_block->dominated_blocks_.push_back(false_block); + false_block->SetDominator(if_block); if_block->dominated_blocks_.push_back(new_pre_header); new_pre_header->SetDominator(if_block); new_pre_header->dominated_blocks_.push_back(header); header->SetDominator(new_pre_header); + // Fix reverse post order. size_t index_of_header = IndexOfElement(reverse_post_order_, header); MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); reverse_post_order_[index_of_header++] = if_block; - reverse_post_order_[index_of_header++] = dummy_block; - reverse_post_order_[index_of_header++] = deopt_block; + reverse_post_order_[index_of_header++] = true_block; + reverse_post_order_[index_of_header++] = false_block; reverse_post_order_[index_of_header++] = new_pre_header; - HLoopInformation* info = pre_header->GetLoopInformation(); - if (info != nullptr) { - if_block->SetLoopInformation(info); - dummy_block->SetLoopInformation(info); - deopt_block->SetLoopInformation(info); - new_pre_header->SetLoopInformation(info); - for (HLoopInformationOutwardIterator loop_it(*pre_header); + // Fix loop information. + HLoopInformation* loop_info = old_pre_header->GetLoopInformation(); + if (loop_info != nullptr) { + if_block->SetLoopInformation(loop_info); + true_block->SetLoopInformation(loop_info); + false_block->SetLoopInformation(loop_info); + new_pre_header->SetLoopInformation(loop_info); + // Add blocks to all enveloping loops. + for (HLoopInformationOutwardIterator loop_it(*old_pre_header); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(if_block); - loop_it.Current()->Add(dummy_block); - loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(true_block); + loop_it.Current()->Add(false_block); loop_it.Current()->Add(new_pre_header); } } + + // Fix try/catch information. + TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock() + ? old_pre_header->GetTryCatchInformation() + : nullptr; + if_block->SetTryCatchInformation(try_catch_info); + true_block->SetTryCatchInformation(try_catch_info); + false_block->SetTryCatchInformation(try_catch_info); + new_pre_header->SetTryCatchInformation(try_catch_info); } void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { @@ -1940,6 +2112,60 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { return !opt.GetDoesNotNeedDexCache(); } +void HInvokeStaticOrDirect::InsertInputAt(size_t index, HInstruction* input) { + inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input)); + input->AddUseAt(this, index); + // Update indexes in use nodes of inputs that have been pushed further back by the insert(). + for (size_t i = index + 1u, size = inputs_.size(); i != size; ++i) { + DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i - 1u); + InputRecordAt(i).GetUseNode()->SetIndex(i); + } +} + +void HInvokeStaticOrDirect::RemoveInputAt(size_t index) { + RemoveAsUserOfInput(index); + inputs_.erase(inputs_.begin() + index); + // Update indexes in use nodes of inputs that have been pulled forward by the erase(). + for (size_t i = index, e = InputCount(); i < e; ++i) { + DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u); + InputRecordAt(i).GetUseNode()->SetIndex(i); + } +} + +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) { + switch (rhs) { + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + return os << "string_init"; + case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: + return os << "recursive"; + case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: + return os << "direct"; + case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: + return os << "direct_fixup"; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: + return os << "dex_cache_pc_relative"; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: + return os << "dex_cache_via_method"; + default: + LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); + UNREACHABLE(); + } +} + +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) { + switch (rhs) { + case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit: + return os << "explicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit: + return os << "implicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone: + return os << "none"; + default: + LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs); + UNREACHABLE(); + } +} + void HInstruction::RemoveEnvironmentUsers() { for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) { HUseListNode<HEnvironment*>* user_node = use_it.Current(); @@ -1949,4 +2175,46 @@ void HInstruction::RemoveEnvironmentUsers() { env_uses_.Clear(); } +// Returns an instruction with the opposite boolean value from 'cond'. +HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) { + ArenaAllocator* allocator = GetArena(); + + if (cond->IsCondition() && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) { + // Can't reverse floating point conditions. We have to use HBooleanNot in that case. + HInstruction* lhs = cond->InputAt(0); + HInstruction* rhs = cond->InputAt(1); + HInstruction* replacement = nullptr; + switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* + case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break; + case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break; + case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break; + case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break; + case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break; + case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break; + case kCondB: replacement = new (allocator) HBelow(lhs, rhs); break; + case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break; + case kCondA: replacement = new (allocator) HAbove(lhs, rhs); break; + case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break; + default: + LOG(FATAL) << "Unexpected condition"; + UNREACHABLE(); + } + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } else if (cond->IsIntConstant()) { + HIntConstant* int_const = cond->AsIntConstant(); + if (int_const->IsZero()) { + return GetIntConstant(1); + } else { + DCHECK(int_const->IsOne()); + return GetIntConstant(0); + } + } else { + HInstruction* replacement = new (allocator) HBooleanNot(cond); + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0f2c1cffee..19614f11c6 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -35,6 +35,7 @@ #include "mirror/class.h" #include "offsets.h" #include "primitive.h" +#include "utils/array_ref.h" namespace art { @@ -240,8 +241,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // put deoptimization instructions, etc. void TransformLoopHeaderForBCE(HBasicBlock* header); - // Removes `block` from the graph. - void DeleteDeadBlock(HBasicBlock* block); + // Removes `block` from the graph. Assumes `block` has been disconnected from + // other blocks and has no instructions or phis. + void DeleteDeadEmptyBlock(HBasicBlock* block); // Splits the edge between `block` and `successor` while preserving the // indices in the predecessor/successor lists. If there are multiple edges @@ -350,8 +352,6 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HCurrentMethod* GetCurrentMethod(); - HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; - const DexFile& GetDexFile() const { return dex_file_; } @@ -371,6 +371,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + // Returns an instruction with the opposite boolean value from 'cond'. + // The instruction has been inserted into the graph, either as a constant, or + // before cursor. + HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor); + private: void FindBackEdges(ArenaBitVector* visited); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; @@ -661,6 +666,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { return successors_; } + ArrayRef<HBasicBlock* const> GetNormalSuccessors() const; + ArrayRef<HBasicBlock* const> GetExceptionalSuccessors() const; + bool HasSuccessor(const HBasicBlock* block, size_t start_from = 0u) { return ContainsElement(successors_, block, start_from); } @@ -811,12 +819,6 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { return GetPredecessorIndexOf(predecessor) == idx; } - // Returns the number of non-exceptional successors. SsaChecker ensures that - // these are stored at the beginning of the successor list. - size_t NumberOfNormalSuccessors() const { - return EndsWithTryBoundary() ? 1 : GetSuccessors().size(); - } - // Create a new block between this block and its predecessors. The new block // is added to the graph, all predecessor edges are relinked to it and an edge // is created to `this`. Returns the new empty block. Reverse post order or @@ -837,6 +839,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // blocks are consistent (for example ending with a control flow instruction). HBasicBlock* SplitAfter(HInstruction* cursor); + // Split catch block into two blocks after the original move-exception bytecode + // instruction, or at the beginning if not present. Returns the newly created, + // latter block, or nullptr if such block could not be created (must be dead + // in that case). Note that this method just updates raw block information, + // like predecessors, successors, dominators, and instruction list. It does not + // update the graph, reverse post order, loop information, nor make sure the + // blocks are consistent (for example ending with a control flow instruction). + HBasicBlock* SplitCatchBlockAfterMoveException(); + // Merge `other` at the end of `this`. Successors and dominated blocks of // `other` are changed to be successors and dominated blocks of `this`. Note // that this method does not update the graph, reverse post order, loop @@ -1084,13 +1095,20 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#ifndef ART_ENABLE_CODEGEN_arm #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + M(ArmDexCacheArraysBase, Instruction) +#endif #ifndef ART_ENABLE_CODEGEN_arm64 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64DataProcWithShifterOp, Instruction) \ + M(Arm64IntermediateAddress, Instruction) \ + M(Arm64MultiplyAccumulate, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1430,7 +1448,7 @@ class SideEffects : public ValueObject { return flags_ == (kAllChangeBits | kAllDependOnBits); } - // Returns true if this may read something written by other. + // Returns true if `this` may read something written by `other`. bool MayDependOn(SideEffects other) const { const uint64_t depends_on_flags = (flags_ & kAllDependOnBits) >> kChangeBits; return (other.flags_ & depends_on_flags); @@ -1620,6 +1638,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { return holder_; } + + bool IsFromInlinedInvoke() const { + return GetParent() != nullptr; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -1725,6 +1748,13 @@ class ReferenceTypeInfo : ValueObject { return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); } + bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + DCHECK(rti.IsValid()); + return GetTypeHandle().Get() != rti.GetTypeHandle().Get() && + GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + // Returns true if the type information provide the same amount of details. // Note that it does not mean that the instructions have the same actual type // (because the type can be the result of a merge). @@ -1927,6 +1957,14 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // Move `this` instruction before `cursor`. void MoveBefore(HInstruction* cursor); + // Move `this` before its first user and out of any loops. If there is no + // out-of-loop user that dominates all other users, move the instruction + // to the end of the out-of-loop common dominator of the user's blocks. + // + // This can be used only on non-throwing instructions with no side effects that + // have at least one use but no environment uses. + void MoveBeforeFirstUserAndOutOfLoops(); + #define INSTRUCTION_TYPE_CHECK(type, super) \ bool Is##type() const { return (As##type() != nullptr); } \ virtual const H##type* As##type() const { return nullptr; } \ @@ -2390,6 +2428,10 @@ class HTryBoundary : public HTemplateInstruction<0> { // Returns the block's non-exceptional successor (index zero). HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; } + ArrayRef<HBasicBlock* const> GetExceptionHandlers() const { + return ArrayRef<HBasicBlock* const>(GetBlock()->GetSuccessors()).SubArray(1u); + } + // Returns whether `handler` is among its exception handlers (non-zero index // successors). bool HasExceptionHandler(const HBasicBlock& handler) const { @@ -2417,25 +2459,6 @@ class HTryBoundary : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HTryBoundary); }; -// Iterator over exception handlers of a given HTryBoundary, i.e. over -// exceptional successors of its basic block. -class HExceptionHandlerIterator : public ValueObject { - public: - explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary) - : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {} - - bool Done() const { return index_ == block_.GetSuccessors().size(); } - HBasicBlock* Current() const { return block_.GetSuccessors()[index_]; } - size_t CurrentSuccessorIndex() const { return index_; } - void Advance() { ++index_; } - - private: - const HBasicBlock& block_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HExceptionHandlerIterator); -}; - // Deoptimize to interpreter, upon checking a condition. class HDeoptimize : public HTemplateInstruction<1> { public: @@ -2604,6 +2627,11 @@ class HBinaryOperation : public HExpression<2> { VLOG(compiler) << DebugName() << " is not defined for the (long, int) case."; return nullptr; } + virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, + HNullConstant* y ATTRIBUTE_UNUSED) const { + VLOG(compiler) << DebugName() << " is not defined for the (null, null) case."; + return nullptr; + } // Returns an input that can legally be used as the right input and is // constant, or null. @@ -2694,6 +2722,10 @@ class HEqual : public HCondition { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } + HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, + HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(1); + } DECLARE_INSTRUCTION(Equal); @@ -2726,6 +2758,10 @@ class HNotEqual : public HCondition { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } + HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, + HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant(0); + } DECLARE_INSTRUCTION(NotEqual); @@ -3227,7 +3263,7 @@ class HInvoke : public HInstruction { void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache); bool IsFromInlinedInvoke() const { - return GetEnvironment()->GetParent() != nullptr; + return GetEnvironment()->IsFromInlinedInvoke(); } bool CanThrow() const OVERRIDE { return true; } @@ -3395,28 +3431,48 @@ class HInvokeStaticOrDirect : public HInvoke { MethodReference target_method, DispatchInfo dispatch_info, InvokeType original_invoke_type, - InvokeType invoke_type, + InvokeType optimized_invoke_type, ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, - // There is one extra argument for the HCurrentMethod node, and - // potentially one other if the clinit check is explicit, and one other - // if the method is a string factory. - 1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) - + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u), + // There is potentially one extra argument for the HCurrentMethod node, and + // potentially one other if the clinit check is explicit, and potentially + // one other if the method is a string factory. + (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) + + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) + + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u), return_type, dex_pc, method_index, original_invoke_type), - invoke_type_(invoke_type), + optimized_invoke_type_(optimized_invoke_type), clinit_check_requirement_(clinit_check_requirement), target_method_(target_method), - dispatch_info_(dispatch_info) {} + dispatch_info_(dispatch_info) { } void SetDispatchInfo(const DispatchInfo& dispatch_info) { + bool had_current_method_input = HasCurrentMethodInput(); + bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind); + + // Using the current method is the default and once we find a better + // method load kind, we should not go back to using the current method. + DCHECK(had_current_method_input || !needs_current_method_input); + + if (had_current_method_input && !needs_current_method_input) { + DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); + RemoveInputAt(GetSpecialInputIndex()); + } dispatch_info_ = dispatch_info; } + void AddSpecialInput(HInstruction* input) { + // We allow only one special input. + DCHECK(!IsStringInit() && !HasCurrentMethodInput()); + DCHECK(InputCount() == GetSpecialInputIndex() || + (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck())); + InsertInputAt(GetSpecialInputIndex(), input); + } + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // We access the method via the dex cache so we can't do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. @@ -3427,19 +3483,42 @@ class HInvokeStaticOrDirect : public HInvoke { return return_type_ == Primitive::kPrimNot && !IsStringInit(); } - InvokeType GetInvokeType() const { return invoke_type_; } + // Get the index of the special input, if any. + // + // If the invoke IsStringInit(), it initially has a HFakeString special argument + // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(), + // the "special input" is the current method pointer; otherwise there may be one + // platform-specific special input, such as PC-relative addressing base. + uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); } + + InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; } + void SetOptimizedInvokeType(InvokeType invoke_type) { + optimized_invoke_type_ = invoke_type; + } + MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } - uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); } bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } - bool HasPcRelDexCache() const { + bool HasPcRelativeDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; } + bool HasCurrentMethodInput() const { + // This function can be called only after the invoke has been fully initialized by the builder. + if (NeedsCurrentMethodInput(GetMethodLoadKind())) { + DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod()); + return true; + } else { + DCHECK(InputCount() == GetSpecialInputIndex() || + !InputAt(GetSpecialInputIndex())->IsCurrentMethod()); + return false; + } + } bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; } MethodReference GetTargetMethod() const { return target_method_; } + void SetTargetMethod(MethodReference method) { target_method_ = method; } int32_t GetStringInitOffset() const { DCHECK(IsStringInit()); @@ -3452,7 +3531,7 @@ class HInvokeStaticOrDirect : public HInvoke { } uint32_t GetDexCacheArrayOffset() const { - DCHECK(HasPcRelDexCache()); + DCHECK(HasPcRelativeDexCache()); return dispatch_info_.method_load_data; } @@ -3465,29 +3544,28 @@ class HInvokeStaticOrDirect : public HInvoke { // Is this instruction a call to a static method? bool IsStatic() const { - return GetInvokeType() == kStatic; + return GetOriginalInvokeType() == kStatic; } - // Remove the art::HLoadClass instruction set as last input by - // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of - // the initial art::HClinitCheck instruction (only relevant for - // static calls with explicit clinit check). - void RemoveLoadClassAsLastInput() { + // Remove the HClinitCheck or the replacement HLoadClass (set as last input by + // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck) + // instruction; only relevant for static calls with explicit clinit check. + void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) { DCHECK(IsStaticWithExplicitClinitCheck()); size_t last_input_index = InputCount() - 1; HInstruction* last_input = InputAt(last_input_index); DCHECK(last_input != nullptr); - DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); + DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName(); RemoveAsUserOfInput(last_input_index); inputs_.pop_back(); - clinit_check_requirement_ = ClinitCheckRequirement::kImplicit; - DCHECK(IsStaticWithImplicitClinitCheck()); + clinit_check_requirement_ = new_requirement; + DCHECK(!IsStaticWithExplicitClinitCheck()); } bool IsStringFactoryFor(HFakeString* str) const { if (!IsStringInit()) return false; - // +1 for the current method. - if (InputCount() == (number_of_arguments_ + 1)) return false; + DCHECK(!HasCurrentMethodInput()); + if (InputCount() == (number_of_arguments_)) return false; return InputAt(InputCount() - 1)->AsFakeString() == str; } @@ -3502,7 +3580,7 @@ class HInvokeStaticOrDirect : public HInvoke { } // Is this a call to a static method whose declaring class has an - // explicit intialization check in the graph? + // explicit initialization check in the graph? bool IsStaticWithExplicitClinitCheck() const { return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit); } @@ -3513,6 +3591,11 @@ class HInvokeStaticOrDirect : public HInvoke { return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit); } + // Does this method load kind need the current method as an input? + static bool NeedsCurrentMethodInput(MethodLoadKind kind) { + return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod; + } + DECLARE_INSTRUCTION(InvokeStaticOrDirect); protected: @@ -3530,8 +3613,11 @@ class HInvokeStaticOrDirect : public HInvoke { return input_record; } + void InsertInputAt(size_t index, HInstruction* input); + void RemoveInputAt(size_t index); + private: - const InvokeType invoke_type_; + InvokeType optimized_invoke_type_; ClinitCheckRequirement clinit_check_requirement_; // The target method may refer to different dex file or method index than the original // invoke. This happens for sharpened calls and for calls where a method was redeclared @@ -3541,6 +3627,8 @@ class HInvokeStaticOrDirect : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs); +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); class HInvokeVirtual : public HInvoke { public: @@ -3595,18 +3683,24 @@ class HInvokeInterface : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; -class HNewInstance : public HExpression<1> { +class HNewInstance : public HExpression<2> { public: - HNewInstance(HCurrentMethod* current_method, + HNewInstance(HInstruction* cls, + HCurrentMethod* current_method, uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, + bool can_throw, + bool finalizable, QuickEntrypointEnum entrypoint) : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc), type_index_(type_index), dex_file_(dex_file), + can_throw_(can_throw), + finalizable_(finalizable), entrypoint_(entrypoint) { - SetRawInputAt(0, current_method); + SetRawInputAt(0, cls); + SetRawInputAt(1, current_method); } uint16_t GetTypeIndex() const { return type_index_; } @@ -3614,22 +3708,30 @@ class HNewInstance : public HExpression<1> { // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } - // It may throw when called on: - // - interfaces - // - abstract/innaccessible/unknown classes - // TODO: optimize when possible. - bool CanThrow() const OVERRIDE { return true; } + + // It may throw when called on type that's not instantiable/accessible. + // It can throw OOME. + // TODO: distinguish between the two cases so we can for example allow allocation elimination. + bool CanThrow() const OVERRIDE { return can_throw_ || true; } + + bool IsFinalizable() const { return finalizable_; } bool CanBeNull() const OVERRIDE { return false; } QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } + void SetEntrypoint(QuickEntrypointEnum entrypoint) { + entrypoint_ = entrypoint; + } + DECLARE_INSTRUCTION(NewInstance); private: const uint16_t type_index_; const DexFile& dex_file_; - const QuickEntrypointEnum entrypoint_; + const bool can_throw_; + const bool finalizable_; + QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; @@ -4237,9 +4339,13 @@ class HPhi : public HInstruction { : HInstruction(SideEffects::None(), dex_pc), inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)), reg_number_(reg_number), - type_(type), - is_live_(false), + type_(ToPhiType(type)), + // Phis are constructed live and marked dead if conflicting or unused. + // Individual steps of SsaBuilder should assume that if a phi has been + // marked dead, it can be ignored and will be removed by SsaPhiElimination. + is_live_(true), can_be_null_(true) { + DCHECK_NE(type_, Primitive::kPrimVoid); } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. @@ -4710,13 +4816,15 @@ class HLoadClass : public HExpression<1> { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc, - bool needs_access_check) + bool needs_access_check, + bool is_in_dex_cache) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), type_index_(type_index), dex_file_(dex_file), is_referrers_class_(is_referrers_class), generate_clinit_check_(false), needs_access_check_(needs_access_check), + is_in_dex_cache_(is_in_dex_cache), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. @@ -4741,14 +4849,13 @@ class HLoadClass : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } bool NeedsEnvironment() const OVERRIDE { - // Will call runtime and load the class if the class is not loaded yet. - // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } bool MustGenerateClinitCheck() const { return generate_clinit_check_; } + void SetMustGenerateClinitCheck(bool generate_clinit_check) { // The entrypoint the code generator is going to call does not do // clinit of the class. @@ -4757,7 +4864,9 @@ class HLoadClass : public HExpression<1> { } bool CanCallRuntime() const { - return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_; + return MustGenerateClinitCheck() || + (!is_referrers_class_ && !is_in_dex_cache_) || + needs_access_check_; } bool NeedsAccessCheck() const { @@ -4765,8 +4874,6 @@ class HLoadClass : public HExpression<1> { } bool CanThrow() const OVERRIDE { - // May call runtime and and therefore can throw. - // TODO: finer grain decision. return CanCallRuntime(); } @@ -4788,6 +4895,8 @@ class HLoadClass : public HExpression<1> { return SideEffects::CanTriggerGC(); } + bool IsInDexCache() const { return is_in_dex_cache_; } + DECLARE_INSTRUCTION(LoadClass); private: @@ -4797,7 +4906,8 @@ class HLoadClass : public HExpression<1> { // Whether this instruction must generate the initialization check. // Used for code generation. bool generate_clinit_check_; - bool needs_access_check_; + const bool needs_access_check_; + const bool is_in_dex_cache_; ReferenceTypeInfo loaded_class_rti_; @@ -4862,6 +4972,7 @@ class HClinitCheck : public HExpression<1> { return true; } + bool CanThrow() const OVERRIDE { return true; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } @@ -5474,6 +5585,9 @@ class HParallelMove : public HTemplateInstruction<0> { } // namespace art +#ifdef ART_ENABLE_CODEGEN_arm +#include "nodes_arm.h" +#endif #ifdef ART_ENABLE_CODEGEN_arm64 #include "nodes_arm64.h" #endif diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h new file mode 100644 index 0000000000..6a1dbb9e70 --- /dev/null +++ b/compiler/optimizing/nodes_arm.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_ +#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_ + +namespace art { + +class HArmDexCacheArraysBase : public HExpression<0> { + public: + explicit HArmDexCacheArraysBase(const DexFile& dex_file) + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), + dex_file_(&dex_file), + element_offset_(static_cast<size_t>(-1)) { } + + void UpdateElementOffset(size_t element_offset) { + // Use the lowest offset from the requested elements so that all offsets from + // this base are non-negative because our assemblers emit negative-offset loads + // as a sequence of two or more instructions. (However, positive offsets beyond + // 4KiB also require two or more instructions, so this simple heuristic could + // be improved for cases where there is a dense cluster of elements far from + // the lowest offset. This is expected to be rare enough though, so we choose + // not to spend compile time on elaborate calculations.) + element_offset_ = std::min(element_offset_, element_offset); + } + + const DexFile& GetDexFile() const { + return *dex_file_; + } + + size_t GetElementOffset() const { + return element_offset_; + } + + DECLARE_INSTRUCTION(ArmDexCacheArraysBase); + + private: + const DexFile* dex_file_; + size_t element_offset_; + + DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_ARM_H_ diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc new file mode 100644 index 0000000000..ac2f093847 --- /dev/null +++ b/compiler/optimizing/nodes_arm64.cc @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_arm64.h" +#include "nodes.h" + +namespace art { + +using arm64::helpers::CanFitInShifterOperand; + +void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount) { + DCHECK(CanFitInShifterOperand(instruction)); + if (instruction->IsShl()) { + *op_kind = kLSL; + *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsShr()) { + *op_kind = kASR; + *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsUShr()) { + *op_kind = kLSR; + *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue(); + } else { + DCHECK(instruction->IsTypeConversion()); + Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType(); + Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType(); + int result_size = Primitive::ComponentSize(result_type); + int input_size = Primitive::ComponentSize(input_type); + int min_size = std::min(result_size, input_size); + // This follows the logic in + // `InstructionCodeGeneratorARM64::VisitTypeConversion()`. + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + // There is actually nothing to do. The register will be used as a W + // register, discarding the top bits. This is represented by the default + // encoding 'LSL 0'. + *op_kind = kLSL; + *shift_amount = 0; + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + *op_kind = kUXTH; + } else { + switch (min_size) { + case 1: *op_kind = kSXTB; break; + case 2: *op_kind = kSXTH; break; + case 4: *op_kind = kSXTW; break; + default: + LOG(FATAL) << "Unexpected min size " << min_size; + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) { + switch (op) { + case HArm64DataProcWithShifterOp::kLSL: return os << "LSL"; + case HArm64DataProcWithShifterOp::kLSR: return os << "LSR"; + case HArm64DataProcWithShifterOp::kASR: return os << "ASR"; + case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB"; + case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH"; + case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW"; + case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB"; + case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH"; + case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW"; + default: + LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op); + UNREACHABLE(); + } +} + +} // namespace art diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 885d3a29ee..e8439354af 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -19,6 +19,79 @@ namespace art { +class HArm64DataProcWithShifterOp : public HExpression<2> { + public: + enum OpKind { + kLSL, // Logical shift left. + kLSR, // Logical shift right. + kASR, // Arithmetic shift right. + kUXTB, // Unsigned extend byte. + kUXTH, // Unsigned extend half-word. + kUXTW, // Unsigned extend word. + kSXTB, // Signed extend byte. + kSXTH, // Signed extend half-word. + kSXTW, // Signed extend word. + + // Aliases. + kFirstShiftOp = kLSL, + kLastShiftOp = kASR, + kFirstExtensionOp = kUXTB, + kLastExtensionOp = kSXTW + }; + HArm64DataProcWithShifterOp(HInstruction* instr, + HInstruction* left, + HInstruction* right, + OpKind op, + // The shift argument is unused if the operation + // is an extension. + int shift = 0, + uint32_t dex_pc = kNoDexPc) + : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) { + DCHECK(!instr->HasSideEffects()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE { + HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp(); + return instr_kind_ == other->instr_kind_ && + op_kind_ == other->op_kind_ && + shift_amount_ == other->shift_amount_; + } + + static bool IsShiftOp(OpKind op_kind) { + return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp; + } + + static bool IsExtensionOp(OpKind op_kind) { + return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp; + } + + // Find the operation kind and shift amount from a bitfield move instruction. + static void GetOpInfoFromInstruction(HInstruction* bitfield_op, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount); + + InstructionKind GetInstrKind() const { return instr_kind_; } + OpKind GetOpKind() const { return op_kind_; } + int GetShiftAmount() const { return shift_amount_; } + + DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp); + + private: + InstructionKind instr_kind_; + OpKind op_kind_; + int shift_amount_; + + friend std::ostream& operator<<(std::ostream& os, OpKind op); + + DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp); +}; + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); + // This instruction computes an intermediate address pointing in the 'middle' of an object. The // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is // never used across anything that can trigger GC. @@ -42,6 +115,40 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; +class HArm64MultiplyAccumulate : public HExpression<3> { + public: + HArm64MultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 05c6b2ca57..2b0d522b31 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -23,6 +23,9 @@ #include "optimizing/code_generator.h" #include "optimizing/optimizing_unit_test.h" #include "utils/assembler.h" +#include "utils/arm/assembler_thumb2.h" +#include "utils/mips/assembler_mips.h" +#include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -36,52 +39,62 @@ class OptimizingCFITest : public CFITest { // Enable this flag to generate the expected outputs. static constexpr bool kGenerateExpected = false; - void TestImpl(InstructionSet isa, const char* isa_str, - const std::vector<uint8_t>& expected_asm, - const std::vector<uint8_t>& expected_cfi) { + OptimizingCFITest() + : pool_(), + allocator_(&pool_), + opts_(), + isa_features_(), + graph_(nullptr), + code_gen_(), + blocks_(allocator_.Adapter()) {} + + void SetUpFrame(InstructionSet isa) { // Setup simple context. - ArenaPool pool; - ArenaAllocator allocator(&pool); - CompilerOptions opts; - std::unique_ptr<const InstructionSetFeatures> isa_features; std::string error; - isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); - HGraph* graph = CreateGraph(&allocator); + isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + graph_ = CreateGraph(&allocator_); // Generate simple frame with some spills. - std::unique_ptr<CodeGenerator> code_gen( - CodeGenerator::Create(graph, isa, *isa_features.get(), opts)); + code_gen_.reset(CodeGenerator::Create(graph_, isa, *isa_features_, opts_)); + code_gen_->GetAssembler()->cfi().SetEnabled(true); const int frame_size = 64; int core_reg = 0; int fp_reg = 0; for (int i = 0; i < 2; i++) { // Two registers of each kind. for (; core_reg < 32; core_reg++) { - if (code_gen->IsCoreCalleeSaveRegister(core_reg)) { + if (code_gen_->IsCoreCalleeSaveRegister(core_reg)) { auto location = Location::RegisterLocation(core_reg); - code_gen->AddAllocatedRegister(location); + code_gen_->AddAllocatedRegister(location); core_reg++; break; } } for (; fp_reg < 32; fp_reg++) { - if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) { + if (code_gen_->IsFloatingPointCalleeSaveRegister(fp_reg)) { auto location = Location::FpuRegisterLocation(fp_reg); - code_gen->AddAllocatedRegister(location); + code_gen_->AddAllocatedRegister(location); fp_reg++; break; } } } - ArenaVector<HBasicBlock*> blocks(allocator.Adapter()); - code_gen->block_order_ = &blocks; - code_gen->ComputeSpillMask(); - code_gen->SetFrameSize(frame_size); - code_gen->GenerateFrameEntry(); - code_gen->GenerateFrameExit(); + code_gen_->block_order_ = &blocks_; + code_gen_->ComputeSpillMask(); + code_gen_->SetFrameSize(frame_size); + code_gen_->GenerateFrameEntry(); + } + + void Finish() { + code_gen_->GenerateFrameExit(); + code_gen_->Finalize(&code_allocator_); + } + + void Check(InstructionSet isa, + const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { // Get the outputs. - InternalCodeAllocator code_allocator; - code_gen->Finalize(&code_allocator); - const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory(); - Assembler* opt_asm = code_gen->GetAssembler(); + const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory(); + Assembler* opt_asm = code_gen_->GetAssembler(); const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data()); if (kGenerateExpected) { @@ -92,6 +105,19 @@ class OptimizingCFITest : public CFITest { } } + void TestImpl(InstructionSet isa, const char* + isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + SetUpFrame(isa); + Finish(); + Check(isa, isa_str, expected_asm, expected_cfi); + } + + CodeGenerator* GetCodeGenerator() { + return code_gen_.get(); + } + private: class InternalCodeAllocator : public CodeAllocator { public: @@ -109,21 +135,111 @@ class OptimizingCFITest : public CFITest { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; + + ArenaPool pool_; + ArenaAllocator allocator_; + CompilerOptions opts_; + std::unique_ptr<const InstructionSetFeatures> isa_features_; + HGraph* graph_; + std::unique_ptr<CodeGenerator> code_gen_; + ArenaVector<HBasicBlock*> blocks_; + InternalCodeAllocator code_allocator_; }; -#define TEST_ISA(isa) \ - TEST_F(OptimizingCFITest, isa) { \ - std::vector<uint8_t> expected_asm(expected_asm_##isa, \ - expected_asm_##isa + arraysize(expected_asm_##isa)); \ - std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ - expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ - TestImpl(isa, #isa, expected_asm, expected_cfi); \ +#define TEST_ISA(isa) \ + TEST_F(OptimizingCFITest, isa) { \ + std::vector<uint8_t> expected_asm( \ + expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi( \ + expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ } TEST_ISA(kThumb2) TEST_ISA(kArm64) TEST_ISA(kX86) TEST_ISA(kX86_64) +TEST_ISA(kMips) +TEST_ISA(kMips64) + +TEST_F(OptimizingCFITest, kThumb2Adjust) { + std::vector<uint8_t> expected_asm( + expected_asm_kThumb2_adjust, + expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kThumb2_adjust, + expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust)); + SetUpFrame(kThumb2); +#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())-> + Label target; + __ CompareAndBranchIfZero(arm::R0, &target); + // Push the target out of range of CBZ. + for (size_t i = 0; i != 65; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi); +} + +TEST_F(OptimizingCFITest, kMipsAdjust) { + // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector<uint8_t> expected_asm( + expected_asm_kMips_adjust_head, + expected_asm_kMips_adjust_head + arraysize(expected_asm_kMips_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips_adjust_tail, + expected_asm_kMips_adjust_tail + arraysize(expected_asm_kMips_adjust_tail)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kMips_adjust, + expected_cfi_kMips_adjust + arraysize(expected_cfi_kMips_adjust)); + SetUpFrame(kMips); +#define __ down_cast<mips::MipsAssembler*>(GetCodeGenerator()->GetAssembler())-> + mips::MipsLabel target; + __ Beqz(mips::A0, &target); + // Push the target out of range of BEQZ. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips, "kMips_adjust", expected_asm, expected_cfi); +} + +TEST_F(OptimizingCFITest, kMips64Adjust) { + // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector<uint8_t> expected_asm( + expected_asm_kMips64_adjust_head, + expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips64_adjust_tail, + expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kMips64_adjust, + expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); + SetUpFrame(kMips64); +#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> + mips64::Mips64Label target; + __ Beqc(mips64::A1, mips64::A2, &target); + // Push the target out of range of BEQC. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); +} #endif // __ANDROID__ diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 2c2c55f295..de857295c7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -138,3 +138,332 @@ static constexpr uint8_t expected_cfi_kX86_64[] = { // 0x0000002c: ret // 0x0000002d: .cfi_restore_state // 0x0000002d: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips[] = { + 0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF, + 0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7, + 0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27, + 0x00, 0x00, 0xA4, 0xAF, 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, + 0x04, 0x00, 0xB5, 0xC7, 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, + 0x10, 0x00, 0xB0, 0x8F, 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, + 0x1C, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips[] = { + 0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, + 0x54, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, + 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -28 +// 0x00000004: .cfi_def_cfa_offset: 28 +// 0x00000004: sw r31, +24(r29) +// 0x00000008: .cfi_offset: r31 at cfa-4 +// 0x00000008: sw r17, +20(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-8 +// 0x0000000c: sw r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-12 +// 0x00000010: swc1 f22, +8(r29) +// 0x00000014: swc1 f23, +12(r29) +// 0x00000018: swc1 f20, +0(r29) +// 0x0000001c: swc1 f21, +4(r29) +// 0x00000020: addiu r29, r29, -36 +// 0x00000024: .cfi_def_cfa_offset: 64 +// 0x00000024: sw r4, +0(r29) +// 0x00000028: .cfi_remember_state +// 0x00000028: addiu r29, r29, 36 +// 0x0000002c: .cfi_def_cfa_offset: 28 +// 0x0000002c: lwc1 f20, +0(r29) +// 0x00000030: lwc1 f21, +4(r29) +// 0x00000034: lwc1 f22, +8(r29) +// 0x00000038: lwc1 f23, +12(r29) +// 0x0000003c: lw r16, +16(r29) +// 0x00000040: .cfi_restore: r16 +// 0x00000040: lw r17, +20(r29) +// 0x00000044: .cfi_restore: r17 +// 0x00000044: lw r31, +24(r29) +// 0x00000048: .cfi_restore: r31 +// 0x00000048: addiu r29, r29, 28 +// 0x0000004c: .cfi_def_cfa_offset: 0 +// 0x0000004c: jr r31 +// 0x00000050: nop +// 0x00000054: .cfi_restore_state +// 0x00000054: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67, + 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF, + 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67, + 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x28, 0x4C, 0xD0, 0x44, 0xD1, + 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: .cfi_remember_state +// 0x00000020: daddiu r29, r29, 24 +// 0x00000024: .cfi_def_cfa_offset: 40 +// 0x00000024: ldc1 f24, +0(r29) +// 0x00000028: ldc1 f25, +8(r29) +// 0x0000002c: ld r16, +16(r29) +// 0x00000030: .cfi_restore: r16 +// 0x00000030: ld r17, +24(r29) +// 0x00000034: .cfi_restore: r17 +// 0x00000034: ld r31, +32(r29) +// 0x00000038: .cfi_restore: r31 +// 0x00000038: daddiu r29, r29, 40 +// 0x0000003c: .cfi_def_cfa_offset: 0 +// 0x0000003c: jr r31 +// 0x00000040: nop +// 0x00000044: .cfi_restore_state +// 0x00000044: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kThumb2_adjust[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28, + 0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, + 0x0B, 0xB0, 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, +}; +static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A, + 0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, + 0x0E, 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: cmp r0, #0 +// 0x0000000c: beq +128 (0x00000090) +// 0x0000000e: ldr r0, [r0, #0] +// 0x00000010: ldr r0, [r0, #0] +// 0x00000012: ldr r0, [r0, #0] +// 0x00000014: ldr r0, [r0, #0] +// 0x00000016: ldr r0, [r0, #0] +// 0x00000018: ldr r0, [r0, #0] +// 0x0000001a: ldr r0, [r0, #0] +// 0x0000001c: ldr r0, [r0, #0] +// 0x0000001e: ldr r0, [r0, #0] +// 0x00000020: ldr r0, [r0, #0] +// 0x00000022: ldr r0, [r0, #0] +// 0x00000024: ldr r0, [r0, #0] +// 0x00000026: ldr r0, [r0, #0] +// 0x00000028: ldr r0, [r0, #0] +// 0x0000002a: ldr r0, [r0, #0] +// 0x0000002c: ldr r0, [r0, #0] +// 0x0000002e: ldr r0, [r0, #0] +// 0x00000030: ldr r0, [r0, #0] +// 0x00000032: ldr r0, [r0, #0] +// 0x00000034: ldr r0, [r0, #0] +// 0x00000036: ldr r0, [r0, #0] +// 0x00000038: ldr r0, [r0, #0] +// 0x0000003a: ldr r0, [r0, #0] +// 0x0000003c: ldr r0, [r0, #0] +// 0x0000003e: ldr r0, [r0, #0] +// 0x00000040: ldr r0, [r0, #0] +// 0x00000042: ldr r0, [r0, #0] +// 0x00000044: ldr r0, [r0, #0] +// 0x00000046: ldr r0, [r0, #0] +// 0x00000048: ldr r0, [r0, #0] +// 0x0000004a: ldr r0, [r0, #0] +// 0x0000004c: ldr r0, [r0, #0] +// 0x0000004e: ldr r0, [r0, #0] +// 0x00000050: ldr r0, [r0, #0] +// 0x00000052: ldr r0, [r0, #0] +// 0x00000054: ldr r0, [r0, #0] +// 0x00000056: ldr r0, [r0, #0] +// 0x00000058: ldr r0, [r0, #0] +// 0x0000005a: ldr r0, [r0, #0] +// 0x0000005c: ldr r0, [r0, #0] +// 0x0000005e: ldr r0, [r0, #0] +// 0x00000060: ldr r0, [r0, #0] +// 0x00000062: ldr r0, [r0, #0] +// 0x00000064: ldr r0, [r0, #0] +// 0x00000066: ldr r0, [r0, #0] +// 0x00000068: ldr r0, [r0, #0] +// 0x0000006a: ldr r0, [r0, #0] +// 0x0000006c: ldr r0, [r0, #0] +// 0x0000006e: ldr r0, [r0, #0] +// 0x00000070: ldr r0, [r0, #0] +// 0x00000072: ldr r0, [r0, #0] +// 0x00000074: ldr r0, [r0, #0] +// 0x00000076: ldr r0, [r0, #0] +// 0x00000078: ldr r0, [r0, #0] +// 0x0000007a: ldr r0, [r0, #0] +// 0x0000007c: ldr r0, [r0, #0] +// 0x0000007e: ldr r0, [r0, #0] +// 0x00000080: ldr r0, [r0, #0] +// 0x00000082: ldr r0, [r0, #0] +// 0x00000084: ldr r0, [r0, #0] +// 0x00000086: ldr r0, [r0, #0] +// 0x00000088: ldr r0, [r0, #0] +// 0x0000008a: ldr r0, [r0, #0] +// 0x0000008c: ldr r0, [r0, #0] +// 0x0000008e: ldr r0, [r0, #0] +// 0x00000090: .cfi_remember_state +// 0x00000090: add sp, sp, #44 +// 0x00000092: .cfi_def_cfa_offset: 20 +// 0x00000092: vpop.f32 {s16-s17} +// 0x00000096: .cfi_def_cfa_offset: 12 +// 0x00000096: .cfi_restore_extended: r80 +// 0x00000096: .cfi_restore_extended: r81 +// 0x00000096: pop {r5, r6, pc} +// 0x00000098: .cfi_restore_state +// 0x00000098: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips_adjust_head[] = { + 0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF, + 0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7, + 0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27, + 0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27, + 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C, + 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F, + 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27, +}; +static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { + 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, 0x04, 0x00, 0xB5, 0xC7, + 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, 0x10, 0x00, 0xB0, 0x8F, + 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, 0x1C, 0x00, 0xBD, 0x27, + 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips_adjust[] = { + 0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, + 0x54, 0x0E, 0x40, 0x4C, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, + 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, 0x44, 0xDF, + 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -28 +// 0x00000004: .cfi_def_cfa_offset: 28 +// 0x00000004: sw r31, +24(r29) +// 0x00000008: .cfi_offset: r31 at cfa-4 +// 0x00000008: sw r17, +20(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-8 +// 0x0000000c: sw r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-12 +// 0x00000010: swc1 f22, +8(r29) +// 0x00000014: swc1 f23, +12(r29) +// 0x00000018: swc1 f20, +0(r29) +// 0x0000001c: swc1 f21, +4(r29) +// 0x00000020: addiu r29, r29, -36 +// 0x00000024: .cfi_def_cfa_offset: 64 +// 0x00000024: sw r4, +0(r29) +// 0x00000028: bne r0, r4, 0x0000004c ; +36 +// 0x0000002c: addiu r29, r29, -4 +// 0x00000030: .cfi_def_cfa_offset: 68 +// 0x00000030: sw r31, +0(r29) +// 0x00000034: bltzal r0, 0x00000038 ; +4 +// 0x00000038: lui r1, 0x20000 +// 0x0000003c: ori r1, r1, 24 +// 0x00000040: addu r1, r1, r31 +// 0x00000044: lw r31, +0(r29) +// 0x00000048: jr r1 +// 0x0000004c: addiu r29, r29, 4 +// 0x00000050: .cfi_def_cfa_offset: 64 +// 0x00000050: nop +// ... +// 0x00020050: nop +// 0x00020054: .cfi_remember_state +// 0x00020054: addiu r29, r29, 36 +// 0x00020058: .cfi_def_cfa_offset: 28 +// 0x00020058: lwc1 f20, +0(r29) +// 0x0002005c: lwc1 f21, +4(r29) +// 0x00020060: lwc1 f22, +8(r29) +// 0x00020064: lwc1 f23, +12(r29) +// 0x00020068: lw r16, +16(r29) +// 0x0002006c: .cfi_restore: r16 +// 0x0002006c: lw r17, +20(r29) +// 0x00020070: .cfi_restore: r17 +// 0x00020070: lw r31, +24(r29) +// 0x00020074: .cfi_restore: r31 +// 0x00020074: addiu r29, r29, 28 +// 0x00020078: .cfi_def_cfa_offset: 0 +// 0x00020078: jr r31 +// 0x0002007c: nop +// 0x00020080: .cfi_restore_state +// 0x00020080: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60, + 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, +}; +static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { + 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, + 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, + 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64_adjust[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, + 0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: bnec r5, r6, 0x0000002c ; +12 +// 0x00000024: auipc r1, 2 +// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080 +// 0x0000002c: nop +// ... +// 0x0002002c: nop +// 0x00020030: .cfi_remember_state +// 0x00020030: daddiu r29, r29, 24 +// 0x00020034: .cfi_def_cfa_offset: 40 +// 0x00020034: ldc1 f24, +0(r29) +// 0x00020038: ldc1 f25, +8(r29) +// 0x0002003c: ld r16, +16(r29) +// 0x00020040: .cfi_restore: r16 +// 0x00020040: ld r17, +24(r29) +// 0x00020044: .cfi_restore: r17 +// 0x00020044: ld r31, +32(r29) +// 0x00020048: .cfi_restore: r31 +// 0x00020048: daddiu r29, r29, 40 +// 0x0002004c: .cfi_def_cfa_offset: 0 +// 0x0002004c: jr r31 +// 0x00020050: nop +// 0x00020054: .cfi_restore_state +// 0x00020054: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 8cb2cfc816..cae2d3f01b 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -20,11 +20,15 @@ #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm64 +#include "dex_cache_array_fixups_arm.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif #ifdef ART_ENABLE_CODEGEN_x86 -#include "constant_area_fixups_x86.h" +#include "pc_relative_fixups_x86.h" #endif #include "art_method-inl.h" @@ -56,6 +60,7 @@ #include "inliner.h" #include "instruction_simplifier.h" #include "intrinsics.h" +#include "jit/jit_code_cache.h" #include "licm.h" #include "jni/quick/jni_compiler.h" #include "load_store_elimination.h" @@ -109,24 +114,23 @@ class PassScope; class PassObserver : public ValueObject { public: PassObserver(HGraph* graph, - const char* method_name, CodeGenerator* codegen, std::ostream* visualizer_output, CompilerDriver* compiler_driver) : graph_(graph), - method_name_(method_name), + cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), - timing_logger_(method_name, true, true), + timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { - if (!IsVerboseMethod(compiler_driver, method_name)) { + if (!IsVerboseMethod(compiler_driver, GetMethodName())) { timing_logger_enabled_ = visualizer_enabled_ = false; } if (visualizer_enabled_) { - visualizer_.PrintHeader(method_name_); + visualizer_.PrintHeader(GetMethodName()); codegen->SetDisassemblyInformation(&disasm_info_); } } @@ -134,7 +138,7 @@ class PassObserver : public ValueObject { ~PassObserver() { if (timing_logger_enabled_) { - LOG(INFO) << "TIMINGS " << method_name_; + LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } } @@ -147,6 +151,14 @@ class PassObserver : public ValueObject { void SetGraphInBadState() { graph_in_bad_state_ = true; } + const char* GetMethodName() { + // PrettyMethod() is expensive, so we delay calling it until we actually have to. + if (cached_method_name_.empty()) { + cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile()); + } + return cached_method_name_.c_str(); + } + private: void StartPass(const char* pass_name) { // Dump graph first, then start timer. @@ -205,7 +217,8 @@ class PassObserver : public ValueObject { } HGraph* const graph_; - const char* method_name_; + + std::string cached_method_name_; bool timing_logger_enabled_; TimingLogger timing_logger_; @@ -258,15 +271,6 @@ class OptimizingCompiler FINAL : public Compiler { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const OVERRIDE; - CompiledMethod* TryCompile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const; - CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) const OVERRIDE { @@ -291,23 +295,45 @@ class OptimizingCompiler FINAL : public Compiler { } } + bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method) + OVERRIDE + SHARED_REQUIRES(Locks::mutator_lock_); + private: // Whether we should run any optimization or register allocation. If false, will // just run the code generation after the graph was built. const bool run_optimizations_; - // Optimize and compile `graph`. - CompiledMethod* CompileOptimized(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const; - - // Just compile without doing optimizations. - CompiledMethod* CompileBaseline(CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const; + // Create a 'CompiledMethod' for an optimized graph. + CompiledMethod* EmitOptimized(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; + + // Create a 'CompiledMethod' for a non-optimized graph. + CompiledMethod* EmitBaseline(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; + + // Try compiling a method and return the code generator used for + // compiling it. + // This method: + // 1) Builds the graph. Returns null if it failed to build it. + // 2) If `run_optimizations_` is set: + // 2.1) Transform the graph to SSA. Returns null if it failed. + // 2.2) Run optimizations on the graph, including register allocator. + // 3) Generate code with the `code_allocator` provided. + CodeGenerator* TryCompile(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) const; std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -369,6 +395,15 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } +// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment. +// TODO: Add support for other architectures and remove this function +static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { + return instruction_set == kArm64 + || instruction_set == kThumb2 + || instruction_set == kX86 + || instruction_set == kX86_64; +} + static void RunOptimizations(HOptimization* optimizations[], size_t length, PassObserver* pass_observer) { @@ -391,20 +426,9 @@ static void MaybeRunInliner(HGraph* graph, if (!should_inline) { return; } - - ArenaAllocator* arena = graph->GetArena(); - HInliner* inliner = new (arena) HInliner( + HInliner* inliner = new (graph->GetArena()) HInliner( graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); - ReferenceTypePropagation* type_propagation = - new (arena) ReferenceTypePropagation(graph, handles, - "reference_type_propagation_after_inlining"); - - HOptimization* optimizations[] = { - inliner, - // Run another type propagation phase: inlining will open up more opportunities - // to remove checkcast/instanceof and null checks. - type_propagation, - }; + HOptimization* optimizations[] = { inliner }; RunOptimizations(optimizations, arraysize(optimizations), pass_observer); } @@ -415,6 +439,17 @@ static void RunArchOptimizations(InstructionSet instruction_set, PassObserver* pass_observer) { ArenaAllocator* arena = graph->GetArena(); switch (instruction_set) { +#ifdef ART_ENABLE_CODEGEN_arm + case kThumb2: + case kArm: { + arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); + HOptimization* arm_optimizations[] = { + fixups + }; + RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); + break; + } +#endif #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: { arm64::InstructionSimplifierArm64* simplifier = @@ -432,10 +467,9 @@ static void RunArchOptimizations(InstructionSet instruction_set, #endif #ifdef ART_ENABLE_CODEGEN_x86 case kX86: { - x86::ConstantAreaFixups* constant_area_fixups = - new (arena) x86::ConstantAreaFixups(graph, stats); + x86::PcRelativeFixups* pc_relative_fixups = new (arena) x86::PcRelativeFixups(graph, stats); HOptimization* x86_optimizations[] = { - constant_area_fixups + pc_relative_fixups }; RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer); break; @@ -446,13 +480,32 @@ static void RunArchOptimizations(InstructionSet instruction_set, } } +NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects. +static void AllocateRegisters(HGraph* graph, + CodeGenerator* codegen, + PassObserver* pass_observer) { + PrepareForRegisterAllocation(graph).Run(); + SsaLivenessAnalysis liveness(graph, codegen); + { + PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer); + liveness.Analyze(); + } + { + PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); + RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); + } +} + static void RunOptimizations(HGraph* graph, CodeGenerator* codegen, CompilerDriver* driver, OptimizingCompilerStats* stats, const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - StackHandleScopeCollection* handles) { + PassObserver* pass_observer) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + ScopedThreadSuspension sts(soa.Self(), kNative); + ArenaAllocator* arena = graph->GetArena(); HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); @@ -462,14 +515,15 @@ static void RunOptimizations(HGraph* graph, InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce"); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); LICM* licm = new (arena) LICM(graph, *side_effects); LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); - BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); ReferenceTypePropagation* type_propagation = - new (arena) ReferenceTypePropagation(graph, handles); + new (arena) ReferenceTypePropagation(graph, &handles); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_after_types"); @@ -492,12 +546,13 @@ static void RunOptimizations(HGraph* graph, RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); - MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); + MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles); // TODO: Update passes incompatible with try/catch so we have the same // pipeline for all methods. if (graph->HasTryCatch()) { HOptimization* optimizations2[] = { + boolean_simplify, side_effects, gvn, dce2, @@ -519,6 +574,7 @@ static void RunOptimizations(HGraph* graph, licm, induction, bce, + fold3, // evaluates code generated by dynamic bce simplify3, lse, dce2, @@ -532,6 +588,7 @@ static void RunOptimizations(HGraph* graph, } RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer); + AllocateRegisters(graph, codegen, pass_observer); } // The stack map we generate must be 4-byte aligned on ARM. Since existing @@ -545,22 +602,6 @@ static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) { return ArrayRef<const uint8_t>(vector); } -NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects. -static void AllocateRegisters(HGraph* graph, - CodeGenerator* codegen, - PassObserver* pass_observer) { - PrepareForRegisterAllocation(graph).Run(); - SsaLivenessAnalysis liveness(graph, codegen); - { - PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer); - liveness.Analyze(); - } - { - PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); - RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); - } -} - static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter()); codegen->EmitLinkerPatches(&linker_patches); @@ -574,74 +615,40 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) return linker_patches; } -CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); - soa.Self()->TransitionFromRunnableToSuspended(kNative); - RunOptimizations(graph, - codegen, - compiler_driver, - compilation_stats_.get(), - dex_compilation_unit, - pass_observer, - &handles); - - AllocateRegisters(graph, codegen, pass_observer); - - ArenaAllocator* arena = graph->GetArena(); - CodeVectorAllocator allocator(arena); - DefaultSrcMap src_mapping_table; - codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo() - ? &src_mapping_table - : nullptr); - codegen->CompileOptimized(&allocator); - +CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); - codegen->BuildStackMaps(&stack_map); - - MaybeRecordStat(MethodCompilationStat::kCompiledOptimized); + stack_map.resize(codegen->ComputeStackMapsSize()); + codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size())); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), + ArrayRef<const uint8_t>(code_allocator->GetMemory()), // Follow Quick's behavior and set the frame size to zero if it is // considered "empty" (see the definition of // art::CodeGenerator::HasEmptyFrame). codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(src_mapping_table), + ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), ArrayRef<const uint8_t>(), // mapping_table. ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(), // native_gc_map. ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const LinkerPatch>(linker_patches)); - pass_observer->DumpDisassembly(); - soa.Self()->TransitionFromSuspendedToRunnable(); return compiled_method; } -CompiledMethod* OptimizingCompiler::CompileBaseline( +CompiledMethod* OptimizingCompiler::EmitBaseline( + ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const { - ArenaAllocator* arena = codegen->GetGraph()->GetArena(); - CodeVectorAllocator allocator(arena); - DefaultSrcMap src_mapping_table; - codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo() - ? &src_mapping_table - : nullptr); - codegen->CompileBaseline(&allocator); - + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps)); @@ -649,44 +656,43 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps)); codegen->BuildVMapTable(&vmap_table); ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); + codegen->BuildNativeGCMap(&gc_map, *compiler_driver); - MaybeRecordStat(MethodCompilationStat::kCompiledBaseline); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), + ArrayRef<const uint8_t>(code_allocator->GetMemory()), // Follow Quick's behavior and set the frame size to zero if it is // considered "empty" (see the definition of // art::CodeGenerator::HasEmptyFrame). codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(src_mapping_table), + ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const LinkerPatch>(linker_patches)); - pass_observer->DumpDisassembly(); return compiled_method; } -CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const { - std::string method_name = PrettyMethod(method_idx, dex_file); +CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) const { MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); - // Always use the thumb2 assembler: some runtime functionality (like implicit stack - // overflow checks) assume thumb2. + // Always use the Thumb-2 assembler: some runtime functionality + // (like implicit stack overflow checks) assume Thumb-2. if (instruction_set == kArm) { instruction_set = kThumb2; } @@ -697,6 +703,12 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite return nullptr; } + // When read barriers are enabled, do not attempt to compile for + // instruction sets that have no read barrier support. + if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) { + return nullptr; + } + if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) { MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological); return nullptr; @@ -721,13 +733,10 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite && compiler_driver->RequiresConstructorBarrier(Thread::Current(), dex_compilation_unit.GetDexFile(), dex_compilation_unit.GetClassDefIndex()); - ArenaAllocator arena(Runtime::Current()->GetArenaPool()); - HGraph* graph = new (&arena) HGraph( - &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), + HGraph* graph = new (arena) HGraph( + arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable()); - bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_; - std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, instruction_set, @@ -741,7 +750,6 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()); PassObserver pass_observer(graph, - method_name.c_str(), codegen.get(), visualizer_output_.get(), compiler_driver); @@ -769,7 +777,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite interpreter_metadata, dex_cache); - VLOG(compiler) << "Building " << method_name; + VLOG(compiler) << "Building " << pass_observer.GetMethodName(); { PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer); @@ -779,58 +787,40 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } } - bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set); - - // `run_optimizations_` is set explicitly (either through a compiler filter - // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back - // to Quick. - bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit(); - CompiledMethod* compiled_method = nullptr; - if (run_optimizations_ && can_allocate_registers) { - VLOG(compiler) << "Optimizing " << method_name; - + VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); + if (run_optimizations_) { { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); if (!graph->TryBuildingSsa()) { // We could not transform the graph to SSA, bailout. - LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; + LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName() + << ": it contains a non natural loop"; MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); pass_observer.SetGraphInBadState(); return nullptr; } } - compiled_method = CompileOptimized(graph, - codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); - } else if (shouldOptimize && can_allocate_registers) { - LOG(FATAL) << "Could not allocate registers in optimizing compiler"; - UNREACHABLE(); - } else if (can_use_baseline) { - VLOG(compiler) << "Compile baseline " << method_name; - - if (!run_optimizations_) { - MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled); - } else if (!can_allocate_registers) { - MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); - } - - compiled_method = CompileBaseline(codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); + RunOptimizations(graph, + codegen.get(), + compiler_driver, + compilation_stats_.get(), + dex_compilation_unit, + &pass_observer); + codegen->CompileOptimized(code_allocator); + } else { + codegen->CompileBaseline(code_allocator); } + pass_observer.DumpDisassembly(); if (kArenaAllocatorCountAllocations) { - if (arena.BytesAllocated() > 4 * MB) { - MemStats mem_stats(arena.GetMemStats()); + if (arena->BytesAllocated() > 4 * MB) { + MemStats mem_stats(arena->GetMemStats()); LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats); } } - return compiled_method; + return codegen.release(); } static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) { @@ -852,33 +842,50 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, Handle<mirror::DexCache> dex_cache) const { CompilerDriver* compiler_driver = GetCompilerDriver(); CompiledMethod* method = nullptr; - if (Runtime::Current()->IsAotCompiler()) { - const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); - DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) - || CanHandleVerificationFailure(verified_method)) { - method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, - method_idx, jclass_loader, dex_file, dex_cache); - } else { - if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + DCHECK(Runtime::Current()->IsAotCompiler()); + const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); + DCHECK(!verified_method->HasRuntimeThrow()); + if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) + || CanHandleVerificationFailure(verified_method)) { + ArenaAllocator arena(Runtime::Current()->GetArenaPool()); + CodeVectorAllocator code_allocator(&arena); + std::unique_ptr<CodeGenerator> codegen( + TryCompile(&arena, + &code_allocator, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + dex_file, + dex_cache)); + if (codegen.get() != nullptr) { + MaybeRecordStat(MethodCompilationStat::kCompiled); + if (run_optimizations_) { + method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver); } else { - MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver); } } } else { - // This is for the JIT compiler, which has already ensured the class is verified. - // We can go straight to compiling. - DCHECK(Runtime::Current()->UseJit()); - method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, - method_idx, jclass_loader, dex_file, dex_cache); + if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + } else { + MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError); + } } if (kIsDebugBuild && IsCompilingWithCoreImage() && - IsInstructionSetSupported(compiler_driver->GetInstructionSet())) { - // For testing purposes, we put a special marker on method names that should be compiled - // with this compiler. This makes sure we're not regressing. + IsInstructionSetSupported(compiler_driver->GetInstructionSet()) && + (!kEmitCompilerReadBarrier || + InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) { + // For testing purposes, we put a special marker on method names + // that should be compiled with this compiler (when the the + // instruction set is supported -- and has support for read + // barriers, if they are enabled). This makes sure we're not + // regressing. std::string method_name = PrettyMethod(method_idx, dex_file); bool shouldCompile = method_name.find("$opt$") != std::string::npos; DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name; @@ -896,4 +903,70 @@ bool IsCompilingWithCoreImage() { return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art"); } +bool OptimizingCompiler::JitCompile(Thread* self, + jit::JitCodeCache* code_cache, + ArtMethod* method) { + StackHandleScope<2> hs(self); + Handle<mirror::ClassLoader> class_loader(hs.NewHandle( + method->GetDeclaringClass()->GetClassLoader())); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); + + jobject jclass_loader = class_loader.ToJObject(); + const DexFile* dex_file = method->GetDexFile(); + const uint16_t class_def_idx = method->GetClassDefIndex(); + const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); + const uint32_t method_idx = method->GetDexMethodIndex(); + const uint32_t access_flags = method->GetAccessFlags(); + const InvokeType invoke_type = method->GetInvokeType(); + + ArenaAllocator arena(Runtime::Current()->GetArenaPool()); + CodeVectorAllocator code_allocator(&arena); + std::unique_ptr<CodeGenerator> codegen; + { + // Go to native so that we don't block GC during compilation. + ScopedThreadSuspension sts(self, kNative); + + DCHECK(run_optimizations_); + codegen.reset( + TryCompile(&arena, + &code_allocator, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + *dex_file, + dex_cache)); + if (codegen.get() == nullptr) { + return false; + } + } + + size_t stack_map_size = codegen->ComputeStackMapsSize(); + uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size); + if (stack_map_data == nullptr) { + return false; + } + codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size)); + const void* code = code_cache->CommitCode( + self, + method, + nullptr, + stack_map_data, + nullptr, + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + code_allocator.GetMemory().data(), + code_allocator.GetSize()); + + if (code == nullptr) { + code_cache->ClearData(self, stack_map_data); + return false; + } + + return true; +} + } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 6375cf1a56..e5ea0f576b 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_ -#include <sstream> +#include <iomanip> #include <string> #include <type_traits> @@ -27,18 +27,18 @@ namespace art { enum MethodCompilationStat { kAttemptCompilation = 0, - kCompiledBaseline, - kCompiledOptimized, + kCompiled, kInlinedInvoke, kInstructionSimplifications, kInstructionSimplificationsArch, kUnresolvedMethod, kUnresolvedField, kUnresolvedFieldNotAFastAccess, + kRemovedCheckedCast, + kRemovedDeadInstruction, + kRemovedNullCheck, kNotCompiledBranchOutsideMethodCode, kNotCompiledCannotBuildSSA, - kNotCompiledCantAccesType, - kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, kNotCompiledMalformedOpcode, @@ -47,13 +47,8 @@ enum MethodCompilationStat { kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, kNotCompiledUnsupportedIsa, + kNotCompiledVerificationError, kNotCompiledVerifyAtRuntime, - kNotOptimizedDisabled, - kNotOptimizedRegisterAllocator, - kNotOptimizedTryCatch, - kRemovedCheckedCast, - kRemovedDeadInstruction, - kRemovedNullCheck, kLastStat }; @@ -66,20 +61,19 @@ class OptimizingCompilerStats { } void Log() const { + if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) { + // Log only in debug builds or if the compiler is verbose. + return; + } + if (compile_stats_[kAttemptCompilation] == 0) { LOG(INFO) << "Did not compile any method."; } else { - size_t unoptimized_percent = - compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation]; - size_t optimized_percent = - compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation]; - std::ostringstream oss; - oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: "; - - oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, "; - oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, "; - - LOG(INFO) << oss.str(); + float compiled_percent = + compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation]; + LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation] + << " methods: " << std::fixed << std::setprecision(2) + << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled."; for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { @@ -92,41 +86,38 @@ class OptimizingCompilerStats { private: std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { + std::string name; switch (stat) { - case kAttemptCompilation : return "kAttemptCompilation"; - case kCompiledBaseline : return "kCompiledBaseline"; - case kCompiledOptimized : return "kCompiledOptimized"; - case kInlinedInvoke : return "kInlinedInvoke"; - case kInstructionSimplifications: return "kInstructionSimplifications"; - case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch"; - case kUnresolvedMethod : return "kUnresolvedMethod"; - case kUnresolvedField : return "kUnresolvedField"; - case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess"; - case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode"; - case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; - case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; - case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; - case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; - case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; - case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode"; - case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledPathological : return "kNotCompiledPathological"; - case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; - case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; - case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; - case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; - case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; - case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; - case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; - case kRemovedCheckedCast: return "kRemovedCheckedCast"; - case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; - case kRemovedNullCheck: return "kRemovedNullCheck"; - - case kLastStat: break; // Invalid to print out. + case kAttemptCompilation : name = "AttemptCompilation"; break; + case kCompiled : name = "Compiled"; break; + case kInlinedInvoke : name = "InlinedInvoke"; break; + case kInstructionSimplifications: name = "InstructionSimplifications"; break; + case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break; + case kUnresolvedMethod : name = "UnresolvedMethod"; break; + case kUnresolvedField : name = "UnresolvedField"; break; + case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break; + case kRemovedCheckedCast: name = "RemovedCheckedCast"; break; + case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break; + case kRemovedNullCheck: name = "RemovedNullCheck"; break; + case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break; + case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break; + case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break; + case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break; + case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break; + case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break; + case kNotCompiledPathological : name = "NotCompiledPathological"; break; + case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break; + case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break; + case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break; + case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break; + case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break; + + case kLastStat: + LOG(FATAL) << "invalid stat " + << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); + UNREACHABLE(); } - LOG(FATAL) << "invalid stat " - << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); - UNREACHABLE(); + return "OptStat#" + name; } AtomicInteger compile_stats_[kLastStat]; diff --git a/compiler/optimizing/constant_area_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index c3470002c5..b383f1e1ad 100644 --- a/compiler/optimizing/constant_area_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "constant_area_fixups_x86.h" +#include "pc_relative_fixups_x86.h" namespace art { namespace x86 { @@ -22,9 +22,18 @@ namespace x86 { /** * Finds instructions that need the constant area base as an input. */ -class ConstantHandlerVisitor : public HGraphVisitor { +class PCRelativeHandlerVisitor : public HGraphVisitor { public: - explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {} + explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {} + + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } private: void VisitAdd(HAdd* add) OVERRIDE { @@ -72,7 +81,7 @@ class ConstantHandlerVisitor : public HGraphVisitor { void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. - InitializeConstantAreaPointer(switch_insn); + InitializePCRelativeBasePointer(); HGraph* graph = GetGraph(); HBasicBlock* block = switch_insn->GetBlock(); HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( @@ -84,31 +93,38 @@ class ConstantHandlerVisitor : public HGraphVisitor { block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch); } - void InitializeConstantAreaPointer(HInstruction* user) { + void InitializePCRelativeBasePointer() { // Ensure we only initialize the pointer once. if (base_ != nullptr) { return; } - HGraph* graph = GetGraph(); - HBasicBlock* entry = graph->GetEntryBlock(); - base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress(); - HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction(); - entry->InsertInstructionBefore(base_, insert_pos); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); DCHECK(base_ != nullptr); } void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { - InitializeConstantAreaPointer(insn); - HGraph* graph = GetGraph(); - HBasicBlock* block = insn->GetBlock(); + InitializePCRelativeBasePointer(); HX86LoadFromConstantTable* load_constant = - new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); - block->InsertInstructionBefore(load_constant, insn); + new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); + insn->GetBlock()->InsertInstructionBefore(load_constant, insn); insn->ReplaceInput(load_constant, input_index); } void HandleInvoke(HInvoke* invoke) { + // If this is an invoke-static/-direct with PC-relative dex cache array + // addressing, we need the PC-relative address base. + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { + InitializePCRelativeBasePointer(); + // Add the extra parameter base_. + DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); + invoke_static_or_direct->AddSpecialInput(base_); + } // Ensure that we can load FP arguments from the constant area. for (size_t i = 0, e = invoke->InputCount(); i < e; i++) { HConstant* input = invoke->InputAt(i)->AsConstant(); @@ -123,9 +139,10 @@ class ConstantHandlerVisitor : public HGraphVisitor { HX86ComputeBaseMethodAddress* base_; }; -void ConstantAreaFixups::Run() { - ConstantHandlerVisitor visitor(graph_); +void PcRelativeFixups::Run() { + PCRelativeHandlerVisitor visitor(graph_); visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); } } // namespace x86 diff --git a/compiler/optimizing/constant_area_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h index 4138039cdd..af708acb03 100644 --- a/compiler/optimizing/constant_area_fixups_x86.h +++ b/compiler/optimizing/pc_relative_fixups_x86.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_ -#define ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_ +#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_ +#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_ #include "nodes.h" #include "optimization.h" @@ -23,10 +23,10 @@ namespace art { namespace x86 { -class ConstantAreaFixups : public HOptimization { +class PcRelativeFixups : public HOptimization { public: - ConstantAreaFixups(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, "constant_area_fixups_x86", stats) {} + PcRelativeFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "pc_relative_fixups_x86", stats) {} void Run() OVERRIDE; }; @@ -34,4 +34,4 @@ class ConstantAreaFixups : public HOptimization { } // namespace x86 } // namespace art -#endif // ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_ +#endif // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_ diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index ca928ae0f2..d1770b75ab 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -48,16 +48,85 @@ void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { } void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { - HLoadClass* cls = check->GetLoadClass(); - check->ReplaceWith(cls); - if (check->GetPrevious() == cls) { + // Try to find a static invoke or a new-instance from which this check originated. + HInstruction* implicit_clinit = nullptr; + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) && + CanMoveClinitCheck(check, user)) { + implicit_clinit = user; + if (user->IsInvokeStaticOrDirect()) { + DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()); + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + } else { + DCHECK(user->IsNewInstance()); + // We delegate the initialization duty to the allocation. + if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) { + user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved); + } + } + break; + } + } + // If we found a static invoke or new-instance for merging, remove the check + // from dominated static invokes. + if (implicit_clinit != nullptr) { + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) { + HInstruction* user = it.Current()->GetUser(); + // All other uses must be dominated. + DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user)); + it.Advance(); // Advance before we remove the node, reference to the next node is preserved. + if (user->IsInvokeStaticOrDirect()) { + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + } + } + } + + HLoadClass* load_class = check->GetLoadClass(); + bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check); + + check->ReplaceWith(load_class); + + if (implicit_clinit != nullptr) { + // Remove the check from the graph. It has been merged into the invoke or new-instance. + check->GetBlock()->RemoveInstruction(check); + // Check if we can merge the load class as well. + if (can_merge_with_load_class && !load_class->HasUses()) { + load_class->GetBlock()->RemoveInstruction(load_class); + } + } else if (can_merge_with_load_class) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. - cls->SetMustGenerateClinitCheck(true); + load_class->SetMustGenerateClinitCheck(true); check->GetBlock()->RemoveInstruction(check); } } +void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { + HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); + bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); + // Change the entrypoint to kQuickAllocObject if either: + // - the class is finalizable (only kQuickAllocObject handles finalizable classes), + // - the class needs access checks (we do not know if it's finalizable), + // - or the load class has only one use. + if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObject); + instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0); + // The allocation entry point that deals with access checks does not work with inlined + // methods, so we need to check whether this allocation comes from an inlined method. + if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) { + // We can remove the load class from the graph. If it needed access checks, we delegate + // the access check to the allocation. + if (load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); + } + load_class->GetBlock()->RemoveInstruction(load_class); + } + } +} + void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { @@ -86,30 +155,60 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // Remove a load class instruction as last input of a static - // invoke, which has been added (along with a clinit check, - // removed by PrepareForRegisterAllocation::VisitClinitCheck - // previously) by the graph builder during the creation of the - // static invoke instruction, but is no longer required at this - // stage (i.e., after inlining has been performed). - invoke->RemoveLoadClassAsLastInput(); - - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { - last_input->SetMustGenerateClinitCheck(false); - - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses()) { - last_input->GetBlock()->RemoveInstruction(last_input); - } + // Detach the explicit class initialization check from the invoke. + // Keeping track of the initializing instruction is no longer required + // at this stage (i.e., after inlining has been performed). + invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + + // Merging with load class should have happened in VisitClinitCheck(). + DCHECK(!CanMoveClinitCheck(last_input, invoke)); + } +} + +bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) { + // Determine if input and user come from the same dex instruction, so that we can move + // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user) + // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user). + + // Start with a quick dex pc check. + if (user->GetDexPc() != input->GetDexPc()) { + return false; + } + + // Now do a thorough environment check that this is really coming from the same instruction in + // the same inlined graph. Unfortunately, we have to go through the whole environment chain. + HEnvironment* user_environment = user->GetEnvironment(); + HEnvironment* input_environment = input->GetEnvironment(); + while (user_environment != nullptr || input_environment != nullptr) { + if (user_environment == nullptr || input_environment == nullptr) { + // Different environment chain length. This happens when a method is called + // once directly and once indirectly through another inlined method. + return false; + } + if (user_environment->GetDexPc() != input_environment->GetDexPc() || + user_environment->GetMethodIdx() != input_environment->GetMethodIdx() || + !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) { + return false; + } + user_environment = user_environment->GetParent(); + input_environment = input_environment->GetParent(); + } + + // Check for code motion taking the input to a different block. + if (user->GetBlock() != input->GetBlock()) { + return false; + } + + // In debug mode, check that we have not inserted a throwing instruction + // or an instruction with side effects between input and user. + if (kIsDebugBuild) { + for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { + CHECK(between != nullptr); // User must be after input in the same block. + CHECK(!between->CanThrow()); + CHECK(!between->HasSideEffects()); } } + return true; } } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index d7f277fa0d..9b2434250d 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,9 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + void VisitNewInstance(HNewInstance* instruction) OVERRIDE; + + bool CanMoveClinitCheck(HInstruction* input, HInstruction* user); DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index c98f43e461..bde54ee977 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -63,7 +63,6 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { - equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } else if (equivalent == input) { // The input has changed its type. It can be an input of other phis, diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 659da068a9..dd349240f7 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -99,17 +99,9 @@ ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, } } -void ReferenceTypePropagation::Run() { - // To properly propagate type info we need to visit in the dominator-based order. - // Reverse post order guarantees a node's dominators are visited first. - // We take advantage of this order in `VisitBasicBlock`. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); - } - ProcessWorklist(); - +void ReferenceTypePropagation::ValidateTypes() { + // TODO: move this to the graph checker. if (kIsDebugBuild) { - // TODO: move this to the graph checker. ScopedObjectAccess soa(Thread::Current()); for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); @@ -135,6 +127,100 @@ void ReferenceTypePropagation::Run() { } } +static void CheckHasNoTypedInputs(HInstruction* root_instr) { + ArenaAllocatorAdapter<void> adapter = + root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation); + + ArenaVector<HPhi*> visited_phis(adapter); + ArenaVector<HInstruction*> worklist(adapter); + worklist.push_back(root_instr); + + while (!worklist.empty()) { + HInstruction* instr = worklist.back(); + worklist.pop_back(); + + if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) { + // Expect that both `root_instr` and its inputs have invalid RTI. + ScopedObjectAccess soa(Thread::Current()); + DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI."; + + // Insert all unvisited inputs to the worklist. + for (HInputIterator it(instr); !it.Done(); it.Advance()) { + HInstruction* input = it.Current(); + if (input->IsPhi()) { + if (ContainsElement(visited_phis, input->AsPhi())) { + continue; + } else { + visited_phis.push_back(input->AsPhi()); + } + } + worklist.push_back(input); + } + } else if (instr->IsNullConstant()) { + // The only input of `root_instr` allowed to have valid RTI because it is ignored. + } else { + LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI " + << instr->GetReferenceTypeInfo(); + UNREACHABLE(); + } + } +} + +template<typename Functor> +static void ForEachUntypedInstruction(HGraph* graph, Functor fn) { + ScopedObjectAccess soa(Thread::Current()); + for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) { + for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { + fn(instr); + } + } + for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { + fn(instr); + } + } + } +} + +void ReferenceTypePropagation::SetUntypedInstructionsToObject() { + // In some cases, the fix-point iteration will leave kPrimNot instructions with + // invalid RTI because bytecode does not provide enough typing information. + // Set the RTI of such instructions to Object. + // Example: + // MyClass a = null, b = null; + // while (a == null) { + // if (cond) { a = b; } else { b = a; } + // } + + if (kIsDebugBuild) { + // Test that if we are going to set RTI from invalid to Object, that + // instruction did not have any typed instructions in its def-use chain + // and therefore its type could not be inferred. + ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); }); + } + + ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false); + ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) { + instr->SetReferenceTypeInfo(obj_rti); + }); +} + +void ReferenceTypePropagation::Run() { + // To properly propagate type info we need to visit in the dominator-based order. + // Reverse post order guarantees a node's dominators are visited first. + // We take advantage of this order in `VisitBasicBlock`. + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } + + ProcessWorklist(); + SetUntypedInstructionsToObject(); + ValidateTypes(); +} + void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { RTPVisitor visitor(graph_, handles_, @@ -530,8 +616,9 @@ void RTPVisitor::VisitLoadException(HLoadException* instr) { void RTPVisitor::VisitNullCheck(HNullCheck* instr) { ScopedObjectAccess soa(Thread::Current()); ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - DCHECK(parent_rti.IsValid()); - instr->SetReferenceTypeInfo(parent_rti); + if (parent_rti.IsValid()) { + instr->SetReferenceTypeInfo(parent_rti); + } } void RTPVisitor::VisitFakeString(HFakeString* instr) { @@ -584,11 +671,16 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) { } if (phi->GetBlock()->IsLoopHeader()) { + ScopedObjectAccess soa(Thread::Current()); // Set the initial type for the phi. Use the non back edge input for reaching // a fixed point faster. + HInstruction* first_input = phi->InputAt(0); + ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo(); + if (first_input_rti.IsValid() && !first_input->IsNullConstant()) { + phi->SetCanBeNull(first_input->CanBeNull()); + phi->SetReferenceTypeInfo(first_input_rti); + } AddToWorklist(phi); - phi->SetCanBeNull(phi->InputAt(0)->CanBeNull()); - phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo()); } else { // Eagerly compute the type of the phi, for quicker convergence. Note // that we don't need to add users to the worklist because we are @@ -610,23 +702,36 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& } bool is_exact = a.IsExact() && b.IsExact(); - Handle<mirror::Class> type_handle; + ReferenceTypeInfo::TypeHandle result_type_handle; + ReferenceTypeInfo::TypeHandle a_type_handle = a.GetTypeHandle(); + ReferenceTypeInfo::TypeHandle b_type_handle = b.GetTypeHandle(); + bool a_is_interface = a_type_handle->IsInterface(); + bool b_is_interface = b_type_handle->IsInterface(); if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) { - type_handle = a.GetTypeHandle(); + result_type_handle = a_type_handle; } else if (a.IsSupertypeOf(b)) { - type_handle = a.GetTypeHandle(); + result_type_handle = a_type_handle; is_exact = false; } else if (b.IsSupertypeOf(a)) { - type_handle = b.GetTypeHandle(); + result_type_handle = b_type_handle; + is_exact = false; + } else if (!a_is_interface && !b_is_interface) { + result_type_handle = handles_->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle)); is_exact = false; } else { - // TODO: Find the first common super class. - type_handle = object_class_handle_; + // This can happen if: + // - both types are interfaces. TODO(calin): implement + // - one is an interface, the other a class, and the type does not implement the interface + // e.g: + // void foo(Interface i, boolean cond) { + // Object o = cond ? i : new Object(); + // } + result_type_handle = object_class_handle_; is_exact = false; } - return ReferenceTypeInfo::Create(type_handle, is_exact); + return ReferenceTypeInfo::Create(result_type_handle, is_exact); } static void UpdateArrayGet(HArrayGet* instr, @@ -636,7 +741,9 @@ static void UpdateArrayGet(HArrayGet* instr, DCHECK_EQ(Primitive::kPrimNot, instr->GetType()); ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - DCHECK(parent_rti.IsValid()); + if (!parent_rti.IsValid()) { + return; + } Handle<mirror::Class> handle = parent_rti.GetTypeHandle(); if (handle->IsObjectArrayClass()) { @@ -648,8 +755,6 @@ static void UpdateArrayGet(HArrayGet* instr, instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false)); } - - return; } bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { @@ -666,7 +771,7 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { instr->SetReferenceTypeInfo(parent_rti); } } else if (instr->IsArrayGet()) { - // TODO: consider if it's worth "looking back" and bounding the input object + // TODO: consider if it's worth "looking back" and binding the input object // to an array type. UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_); } else { @@ -694,6 +799,7 @@ void RTPVisitor::VisitArrayGet(HArrayGet* instr) { if (instr->GetType() != Primitive::kPrimNot) { return; } + ScopedObjectAccess soa(Thread::Current()); UpdateArrayGet(instr, handles_, object_class_handle_); if (!instr->GetReferenceTypeInfo().IsValid()) { @@ -715,14 +821,35 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { instr->SetReferenceTypeInfo(new_rti); } +// NullConstant inputs are ignored during merging as they do not provide any useful information. +// If all the inputs are NullConstants then the type of the phi will be set to Object. void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { - ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + size_t input_count = instr->InputCount(); + size_t first_input_index_not_null = 0; + while (first_input_index_not_null < input_count && + instr->InputAt(first_input_index_not_null)->IsNullConstant()) { + first_input_index_not_null++; + } + if (first_input_index_not_null == input_count) { + // All inputs are NullConstants, set the type to object. + // This may happen in the presence of inlining. + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); + return; + } + + ReferenceTypeInfo new_rti = instr->InputAt(first_input_index_not_null)->GetReferenceTypeInfo(); + if (new_rti.IsValid() && new_rti.IsObjectClass() && !new_rti.IsExact()) { // Early return if we are Object and inexact. instr->SetReferenceTypeInfo(new_rti); return; } - for (size_t i = 1; i < instr->InputCount(); i++) { + + for (size_t i = first_input_index_not_null + 1; i < input_count; i++) { + if (instr->InputAt(i)->IsNullConstant()) { + continue; + } new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo()); if (new_rti.IsValid() && new_rti.IsObjectClass()) { if (!new_rti.IsExact()) { @@ -732,7 +859,10 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { } } } - instr->SetReferenceTypeInfo(new_rti); + + if (new_rti.IsValid()) { + instr->SetReferenceTypeInfo(new_rti); + } } // Re-computes and updates the nullability of the instruction. Returns whether or diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 5493601adc..21789e1331 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -56,6 +56,9 @@ class ReferenceTypePropagation : public HOptimization { ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) SHARED_REQUIRES(Locks::mutator_lock_); + void ValidateTypes(); + void SetUntypedInstructionsToObject(); + StackHandleScopeCollection* handles_; ArenaVector<HInstruction*> worklist_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index ef22c816a0..d399bc2d7a 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1525,7 +1525,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, DCHECK(IsValidDestination(destination)) << destination; if (source.Equals(destination)) return; - DCHECK_EQ(block->NumberOfNormalSuccessors(), 1u); + DCHECK_EQ(block->GetNormalSuccessors().size(), 1u); HInstruction* last = block->GetLastInstruction(); // We insert moves at exit for phi predecessors and connecting blocks. // A block ending with an if or a packed switch cannot branch to a block @@ -1752,7 +1752,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, // If `from` has only one successor, we can put the moves at the exit of it. Otherwise // we need to put the moves at the entry of `to`. - if (from->NumberOfNormalSuccessors() == 1) { + if (from->GetNormalSuccessors().size() == 1) { InsertParallelMoveAtExitOf(from, interval->GetParent()->GetDefinedBy(), source->ToLocation(), @@ -1894,7 +1894,7 @@ void RegisterAllocator::Resolve() { HInstruction* phi = inst_it.Current(); for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) { HBasicBlock* predecessor = current->GetPredecessors()[i]; - DCHECK_EQ(predecessor->NumberOfNormalSuccessors(), 1u); + DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u); HInstruction* input = phi->InputAt(i); Location source = input->GetLiveInterval()->GetLocationAt( predecessor->GetLifetimeEnd() - 1); diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index a128079cdb..5e1d1d9954 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -49,7 +49,8 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { } // TODO: Avoid CompilerDriver. - InvokeType invoke_type = invoke->GetOriginalInvokeType(); + InvokeType original_invoke_type = invoke->GetOriginalInvokeType(); + InvokeType optimized_invoke_type = original_invoke_type; MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex()); int vtable_idx; uintptr_t direct_code, direct_method; @@ -58,15 +59,18 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { invoke->GetDexPc(), false /* update_stats: already updated in builder */, true /* enable_devirtualization */, - &invoke_type, + &optimized_invoke_type, &target_method, &vtable_idx, &direct_code, &direct_method); - DCHECK(success); - DCHECK_EQ(invoke_type, invoke->GetInvokeType()); - DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file); - DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index); + if (!success) { + // TODO: try using kDexCachePcRelative. It's always a valid method load + // kind as long as it's supported by the codegen + return; + } + invoke->SetOptimizedInvokeType(optimized_invoke_type); + invoke->SetTargetMethod(target_method); HInvokeStaticOrDirect::MethodLoadKind method_load_kind; HInvokeStaticOrDirect::CodePtrLocation code_ptr_location; diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index ec45d6b2ca..9bbc354290 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -129,13 +129,13 @@ TEST(SideEffectsTest, NoDependences) { TEST(SideEffectsTest, VolatileDependences) { SideEffects volatile_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, true); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ true); SideEffects any_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false); SideEffects volatile_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, true); + SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ true); SideEffects any_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, false); + SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ false); EXPECT_FALSE(volatile_write.MayDependOn(any_read)); EXPECT_TRUE(any_read.MayDependOn(volatile_write)); @@ -151,15 +151,15 @@ TEST(SideEffectsTest, VolatileDependences) { TEST(SideEffectsTest, SameWidthTypes) { // Type I/F. testWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), - SideEffects::FieldReadOfType(Primitive::kPrimFloat, false)); + SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false), + SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false)); testWriteAndReadDependence( SideEffects::ArrayWriteOfType(Primitive::kPrimInt), SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); // Type L/D. testWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimLong, false), - SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false), + SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false)); testWriteAndReadDependence( SideEffects::ArrayWriteOfType(Primitive::kPrimLong), SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); @@ -171,9 +171,9 @@ TEST(SideEffectsTest, AllWritesAndReads) { for (Primitive::Type type = Primitive::kPrimNot; type < Primitive::kPrimVoid; type = Primitive::Type(type + 1)) { - s = s.Union(SideEffects::FieldWriteOfType(type, false)); + s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayWriteOfType(type)); - s = s.Union(SideEffects::FieldReadOfType(type, false)); + s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayReadOfType(type)); } EXPECT_TRUE(s.DoesAllReadWrite()); @@ -225,10 +225,10 @@ TEST(SideEffectsTest, BitStrings) { "||DJ|||||", // note: DJ alias SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str()); SideEffects s = SideEffects::None(); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false)); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort)); - s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false)); + s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); EXPECT_STREQ( diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 4565590bc3..9e6cfbe653 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -22,6 +22,13 @@ namespace art { +// Returns whether this is a loop header phi which was eagerly created but later +// found inconsistent due to the vreg being undefined in one of its predecessors. +// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. +static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { + return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); +} + /** * A debuggable application may require to reviving phis, to ensure their * associated DEX register is available to a debugger. This class implements @@ -165,17 +172,15 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); + if (IsUndefinedLoopHeaderPhi(phi)) { + DCHECK(phi->IsDead()); + continue; + } if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi to guarantee convergence of the algorithm. - // Note that the dead phi may already have a type if it is an equivalent - // generated for a typed LoadLocal. In that case we do not change the - // type because it could lead to an unsupported PrimNot/Float/Double -> - // PrimInt/Long transition and create same type equivalents. - if (phi->GetType() == Primitive::kPrimVoid) { - phi->SetType(phi->InputAt(0)->GetType()); - } + // Loop phis must have a type to guarantee convergence of the algorithm. + DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -220,6 +225,27 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } +void SsaBuilder::SetLoopHeaderPhiInputs() { + for (size_t i = loop_headers_.size(); i > 0; --i) { + HBasicBlock* block = loop_headers_[i - 1]; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + size_t vreg = phi->GetRegNumber(); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + HInstruction* value = ValueOfLocal(predecessor, vreg); + if (value == nullptr) { + // Vreg is undefined at this predecessor. Mark it dead and leave with + // fewer inputs than predecessors. SsaChecker will fail if not removed. + phi->SetDead(); + break; + } else { + phi->AddInput(value); + } + } + } + } +} + void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -283,15 +309,7 @@ void SsaBuilder::BuildSsa() { } // 2) Set inputs of loop phis. - for (HBasicBlock* block : loop_headers_) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber()); - phi->AddInput(input); - } - } - } + SetLoopHeaderPhiInputs(); // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but @@ -403,8 +421,13 @@ ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { for (size_t i = 0; i < vregs; ++i) { // No point in creating the catch phi if it is already undefined at // the first throwing instruction. - if ((*current_locals_)[i] != nullptr) { - HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + HInstruction* current_local_value = (*current_locals_)[i]; + if (current_local_value != nullptr) { + HPhi* phi = new (arena) HPhi( + arena, + i, + 0, + current_local_value->GetType()); block->AddPhi(phi); (*locals)[i] = phi; } @@ -451,7 +474,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local); if (incoming != nullptr) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + 0, + incoming->GetType()); block->AddPhi(phi); (*current_locals_)[local] = phi; } @@ -484,8 +510,12 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } if (is_different) { + HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local); HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + block->GetPredecessors().size(), + first_input->GetType()); for (size_t i = 0; i < block->GetPredecessors().size(); i++) { HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local); phi->SetRawInputAt(i, pred_value); @@ -583,8 +613,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: phi->GetBlock()->InsertPhiAfter(new_phi, phi); return new_phi; } else { - DCHECK_EQ(next->GetType(), type); - return next->AsPhi(); + HPhi* next_phi = next->AsPhi(); + DCHECK_EQ(next_phi->GetType(), type); + if (next_phi->IsDead()) { + // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) + // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This + // cannot revive undefined loop header phis because they cannot have uses. + DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); + next_phi->SetLive(); + } + return next_phi; } } @@ -638,7 +676,36 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1); + uint32_t reg_number = store->GetLocal()->GetRegNumber(); + HInstruction* stored_value = store->InputAt(1); + Primitive::Type stored_type = stored_value->GetType(); + DCHECK_NE(stored_type, Primitive::kPrimVoid); + + // Storing into vreg `reg_number` may implicitly invalidate the surrounding + // registers. Consider the following cases: + // (1) Storing a wide value must overwrite previous values in both `reg_number` + // and `reg_number+1`. We store `nullptr` in `reg_number+1`. + // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number` + // must invalidate it. We store `nullptr` in `reg_number-1`. + // Consequently, storing a wide value into the high vreg of another wide value + // will invalidate both `reg_number-1` and `reg_number+1`. + + if (reg_number != 0) { + HInstruction* local_low = (*current_locals_)[reg_number - 1]; + if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) { + // The vreg we are storing into was previously the high vreg of a pair. + // We need to invalidate its low vreg. + DCHECK((*current_locals_)[reg_number] == nullptr); + (*current_locals_)[reg_number - 1] = nullptr; + } + } + + (*current_locals_)[reg_number] = stored_value; + if (Primitive::Is64BitType(stored_type)) { + // We are storing a pair. Invalidate the instruction in the high vreg. + (*current_locals_)[reg_number + 1] = nullptr; + } + store->GetBlock()->RemoveInstruction(store); } @@ -660,8 +727,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { if (instruction->CanThrowIntoCatchBlock()) { const HTryBoundary& try_entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry(); - for (HExceptionHandlerIterator it(try_entry); !it.Done(); it.Advance()) { - HBasicBlock* catch_block = it.Current(); + for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) { ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block); DCHECK_EQ(handler_locals->size(), current_locals_->size()); for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 79f1a28ac8..dcce5e4c2c 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -81,6 +81,7 @@ class SsaBuilder : public HGraphVisitor { static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: + void SetLoopHeaderPhiInputs(); void FixNullConstantType(); void EquivalentPhisCleanup(); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 72f9ddd506..a3219dcc38 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -16,6 +16,8 @@ #include "ssa_phi_elimination.h" +#include "base/arena_containers.h" + namespace art { void SsaDeadPhiElimination::Run() { @@ -24,22 +26,36 @@ void SsaDeadPhiElimination::Run() { } void SsaDeadPhiElimination::MarkDeadPhis() { + // Phis are constructed live and should not be revived if previously marked + // dead. This algorithm temporarily breaks that invariant but we DCHECK that + // only phis which were initially live are revived. + ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter()); + // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); - // Set dead ahead of running through uses. The phi may have no use. - phi->SetDead(); + if (phi->IsDead()) { + continue; + } + + bool has_non_phi_use = false; for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* current = use_it.Current(); - HInstruction* user = current->GetUser(); - if (!user->IsPhi()) { - worklist_.push_back(phi); - phi->SetLive(); + if (!use_it.Current()->GetUser()->IsPhi()) { + has_non_phi_use = true; break; } } + + if (has_non_phi_use) { + worklist_.push_back(phi); + } else { + phi->SetDead(); + if (kIsDebugBuild) { + initially_live.insert(phi); + } + } } } @@ -48,10 +64,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist_.back(); worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { - HInstruction* input = it.Current(); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.push_back(input->AsPhi()); - input->AsPhi()->SetLive(); + HPhi* input = it.Current()->AsPhi(); + if (input != nullptr && input->IsDead()) { + // Input is a dead phi. Revive it and add to the worklist. We make sure + // that the phi was not dead initially (see definition of `initially_live`). + DCHECK(ContainsElement(initially_live, input)); + input->SetLive(); + worklist_.push_back(input); } } } @@ -118,7 +137,6 @@ void SsaRedundantPhiElimination::Run() { } if (phi->InputCount() == 0) { - DCHECK(phi->IsCatchPhi()); DCHECK(phi->IsDead()); continue; } |