diff options
Diffstat (limited to 'compiler/optimizing')
69 files changed, 9762 insertions, 3858 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index cca0baf274..4c3f66aa4f 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -20,6 +20,7 @@ #include "base/arena_containers.h" #include "induction_var_range.h" +#include "side_effects_analysis.h" #include "nodes.h" namespace art { @@ -175,6 +176,24 @@ class ValueBound : public ValueObject { return false; } + // Returns if it's certain this->bound > `bound`. + bool GreaterThan(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ > bound.constant_; + } + // Not comparable. Just return false. + return false; + } + + // Returns if it's certain this->bound < `bound`. + bool LessThan(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ < bound.constant_; + } + // Not comparable. Just return false. + return false; + } + // Try to narrow lower bound. Returns the greatest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) { @@ -252,157 +271,6 @@ class ValueBound : public ValueObject { int32_t constant_; }; -// Collect array access data for a loop. -// TODO: make it work for multiple arrays inside the loop. -class ArrayAccessInsideLoopFinder : public ValueObject { - public: - explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable) - : induction_variable_(induction_variable), - found_array_length_(nullptr), - offset_low_(std::numeric_limits<int32_t>::max()), - offset_high_(std::numeric_limits<int32_t>::min()) { - Run(); - } - - HArrayLength* GetFoundArrayLength() const { return found_array_length_; } - bool HasFoundArrayLength() const { return found_array_length_ != nullptr; } - int32_t GetOffsetLow() const { return offset_low_; } - int32_t GetOffsetHigh() const { return offset_high_; } - - // Returns if `block` that is in loop_info may exit the loop, unless it's - // the loop header for loop_info. - static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) { - DCHECK(loop_info->Contains(*block)); - if (block == loop_info->GetHeader()) { - // Loop header of loop_info. Exiting loop is normal. - return false; - } - for (HBasicBlock* successor : block->GetSuccessors()) { - if (!loop_info->Contains(*successor)) { - // One of the successors exits the loop. - return true; - } - } - return false; - } - - static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) { - for (HBasicBlock* back_edge : loop_info->GetBackEdges()) { - if (!block->Dominates(back_edge)) { - return false; - } - } - return true; - } - - void Run() { - HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); - HBasicBlock* block = it_loop.Current(); - DCHECK(block == induction_variable_->GetBlock()); - // Skip loop header. Since narrowed value range of a MonotonicValueRange only - // applies to the loop body (after the test at the end of the loop header). - it_loop.Advance(); - for (; !it_loop.Done(); it_loop.Advance()) { - block = it_loop.Current(); - DCHECK(block->IsInLoop()); - if (!DominatesAllBackEdges(block, loop_info)) { - // In order not to trigger deoptimization unnecessarily, make sure - // that all array accesses collected are really executed in the loop. - // For array accesses in a branch inside the loop, don't collect the - // access. The bounds check in that branch might not be eliminated. - continue; - } - if (EarlyExit(block, loop_info)) { - // If the loop body can exit loop (like break, return, etc.), it's not guaranteed - // that the loop will loop through the full monotonic value range from - // initial_ to end_. So adding deoptimization might be too aggressive and can - // trigger deoptimization unnecessarily even if the loop won't actually throw - // AIOOBE. - found_array_length_ = nullptr; - return; - } - for (HInstruction* instruction = block->GetFirstInstruction(); - instruction != nullptr; - instruction = instruction->GetNext()) { - if (!instruction->IsBoundsCheck()) { - continue; - } - - HInstruction* length_value = instruction->InputAt(1); - if (length_value->IsIntConstant()) { - // TODO: may optimize for constant case. - continue; - } - - if (length_value->IsPhi()) { - // When adding deoptimizations in outer loops, we might create - // a phi for the array length, and update all uses of the - // length in the loop to that phi. Therefore, inner loops having - // bounds checks on the same array will use that phi. - // TODO: handle these cases. - continue; - } - - DCHECK(length_value->IsArrayLength()); - HArrayLength* array_length = length_value->AsArrayLength(); - - HInstruction* array = array_length->InputAt(0); - if (array->IsNullCheck()) { - array = array->AsNullCheck()->InputAt(0); - } - if (loop_info->Contains(*array->GetBlock())) { - // Array is defined inside the loop. Skip. - continue; - } - - if (found_array_length_ != nullptr && found_array_length_ != array_length) { - // There is already access for another array recorded for the loop. - // TODO: handle multiple arrays. - continue; - } - - HInstruction* index = instruction->AsBoundsCheck()->InputAt(0); - HInstruction* left = index; - int32_t right = 0; - if (left == induction_variable_ || - (ValueBound::IsAddOrSubAConstant(index, &left, &right) && - left == induction_variable_)) { - // For patterns like array[i] or array[i + 2]. - if (right < offset_low_) { - offset_low_ = right; - } - if (right > offset_high_) { - offset_high_ = right; - } - } else { - // Access not in induction_variable/(induction_variable_ + constant) - // format. Skip. - continue; - } - // Record this array. - found_array_length_ = array_length; - } - } - } - - private: - // The instruction that corresponds to a MonotonicValueRange. - HInstruction* induction_variable_; - - // The array length of the array that's accessed inside the loop body. - HArrayLength* found_array_length_; - - // The lowest and highest constant offsets relative to induction variable - // instruction_ in all array accesses. - // If array access are: array[i-1], array[i], array[i+1], - // offset_low_ is -1 and offset_high is 1. - int32_t offset_low_; - int32_t offset_high_; - - DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder); -}; - /** * Represent a range of lower bound and upper bound, both being inclusive. * Currently a ValueRange may be generated as a result of the following: @@ -500,18 +368,13 @@ class MonotonicValueRange : public ValueRange { : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()), induction_variable_(induction_variable), initial_(initial), - end_(nullptr), - inclusive_(false), increment_(increment), bound_(bound) {} virtual ~MonotonicValueRange() {} - HInstruction* GetInductionVariable() const { return induction_variable_; } int32_t GetIncrement() const { return increment_; } ValueBound GetBound() const { return bound_; } - void SetEnd(HInstruction* end) { end_ = end; } - void SetInclusive(bool inclusive) { inclusive_ = inclusive; } HBasicBlock* GetLoopHeader() const { DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); return induction_variable_->GetBlock(); @@ -519,23 +382,6 @@ class MonotonicValueRange : public ValueRange { MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } - HBasicBlock* GetLoopHeaderSuccesorInLoop() { - HBasicBlock* header = GetLoopHeader(); - HInstruction* instruction = header->GetLastInstruction(); - DCHECK(instruction->IsIf()); - HIf* h_if = instruction->AsIf(); - HLoopInformation* loop_info = header->GetLoopInformation(); - bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor()); - bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor()); - - // Just in case it's some strange loop structure. - if (true_successor_in_loop && false_successor_in_loop) { - return nullptr; - } - DCHECK(true_successor_in_loop || false_successor_in_loop); - return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor(); - } - // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { if (other_range == nullptr) { @@ -627,467 +473,9 @@ class MonotonicValueRange : public ValueRange { } } - // Try to add HDeoptimize's in the loop pre-header first to narrow this range. - // For example, this loop: - // - // for (int i = start; i < end; i++) { - // array[i - 1] = array[i] + array[i + 1]; - // } - // - // will be transformed to: - // - // int array_length_in_loop_body_if_needed; - // if (start >= end) { - // array_length_in_loop_body_if_needed = 0; - // } else { - // if (start < 1) deoptimize(); - // if (array == null) deoptimize(); - // array_length = array.length; - // if (end > array_length - 1) deoptimize; - // array_length_in_loop_body_if_needed = array_length; - // } - // for (int i = start; i < end; i++) { - // // No more null check and bounds check. - // // array.length value is replaced with array_length_in_loop_body_if_needed - // // in the loop body. - // array[i - 1] = array[i] + array[i + 1]; - // } - // - // We basically first go through the loop body and find those array accesses whose - // index is at a constant offset from the induction variable ('i' in the above example), - // and update offset_low and offset_high along the way. We then add the following - // deoptimizations in the loop pre-header (suppose end is not inclusive). - // if (start < -offset_low) deoptimize(); - // if (end >= array.length - offset_high) deoptimize(); - // It might be necessary to first hoist array.length (and the null check on it) out of - // the loop with another deoptimization. - // - // In order not to trigger deoptimization unnecessarily, we want to make a strong - // guarantee that no deoptimization is triggered if the loop body itself doesn't - // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop - // body must throw AIOOBE). - // This is achieved by the following: - // 1) We only process loops that iterate through the full monotonic range from - // initial_ to end_. We do the following checks to make sure that's the case: - // a) The loop doesn't have early exit (via break, return, etc.) - // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_. - // 2) We only collect array accesses of blocks in the loop body that dominate - // all loop back edges, these array accesses are guaranteed to happen - // at each loop iteration. - // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses - // when the induction variable is at initial_ and end_ must be in a legal range. - // Since the added deoptimizations are basically checking the induction variable - // at initial_ and end_ values, no deoptimization will be triggered either. - // - // A special case is the loop body isn't entered at all. In that case, we may still - // add deoptimization due to the analysis described above. In order not to trigger - // deoptimization, we do a test between initial_ and end_ first and skip over - // the added deoptimization. - ValueRange* NarrowWithDeoptimization() { - if (increment_ != 1 && increment_ != -1) { - // In order not to trigger deoptimization unnecessarily, we want to - // make sure the loop iterates through the full range from initial_ to - // end_ so that boundaries are covered by the loop. An increment of 2, - // for example, may skip end_. - return this; - } - - if (end_ == nullptr) { - // No full info to add deoptimization. - return this; - } - - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - if (!initial_->GetBlock()->Dominates(pre_header) || - !end_->GetBlock()->Dominates(pre_header)) { - // Can't add a check in loop pre-header if the value isn't available there. - return this; - } - - ArrayAccessInsideLoopFinder finder(induction_variable_); - - if (!finder.HasFoundArrayLength()) { - // No array access was found inside the loop that can benefit - // from deoptimization. - return this; - } - - if (!AddDeoptimization(finder)) { - return this; - } - - // After added deoptimizations, induction variable fits in - // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. - ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); - ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); - // We've narrowed the range after added deoptimizations. - return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); - } - - // Returns true if adding a (constant >= value) check for deoptimization - // is allowed and will benefit compiled code. - bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) { - *is_proven = false; - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - DCHECK(value->GetBlock()->Dominates(pre_header)); - - // See if we can prove the relationship first. - if (value->IsIntConstant()) { - if (value->AsIntConstant()->GetValue() >= constant) { - // Already true. - *is_proven = true; - return true; - } else { - // May throw exception. Don't add deoptimization. - // Keep bounds checks in the loops. - return false; - } - } - // Can benefit from deoptimization. - return true; - } - - // Try to filter out cases that the loop entry test will never be true. - bool LoopEntryTestUseful() { - if (initial_->IsIntConstant() && end_->IsIntConstant()) { - int32_t initial_val = initial_->AsIntConstant()->GetValue(); - int32_t end_val = end_->AsIntConstant()->GetValue(); - if (increment_ == 1) { - if (inclusive_) { - return initial_val > end_val; - } else { - return initial_val >= end_val; - } - } else { - DCHECK_EQ(increment_, -1); - if (inclusive_) { - return initial_val < end_val; - } else { - return initial_val <= end_val; - } - } - } - return true; - } - - // Returns the block for adding deoptimization. - HBasicBlock* TransformLoopForDeoptimizationIfNeeded() { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - // Deoptimization is only added when both initial_ and end_ are defined - // before the loop. - DCHECK(initial_->GetBlock()->Dominates(pre_header)); - DCHECK(end_->GetBlock()->Dominates(pre_header)); - - // If it can be proven the loop body is definitely entered (unless exception - // is thrown in the loop header for which triggering deoptimization is fine), - // there is no need for tranforming the loop. In that case, deoptimization - // will just be added in the loop pre-header. - if (!LoopEntryTestUseful()) { - return pre_header; - } - - HGraph* graph = header->GetGraph(); - graph->TransformLoopHeaderForBCE(header); - HBasicBlock* new_pre_header = header->GetDominator(); - DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader()); - HBasicBlock* if_block = new_pre_header->GetDominator(); - HBasicBlock* dummy_block = if_block->GetSuccessors()[0]; // True successor. - HBasicBlock* deopt_block = if_block->GetSuccessors()[1]; // False successor. - - dummy_block->AddInstruction(new (graph->GetArena()) HGoto()); - deopt_block->AddInstruction(new (graph->GetArena()) HGoto()); - new_pre_header->AddInstruction(new (graph->GetArena()) HGoto()); - return deopt_block; - } - - // Adds a test between initial_ and end_ to see if the loop body is entered. - // If the loop body isn't entered at all, it jumps to the loop pre-header (after - // transformation) to avoid any deoptimization. - void AddLoopBodyEntryTest() { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - HBasicBlock* if_block = pre_header->GetDominator(); - HGraph* graph = header->GetGraph(); - - HCondition* cond; - if (increment_ == 1) { - if (inclusive_) { - cond = new (graph->GetArena()) HGreaterThan(initial_, end_); - } else { - cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_); - } - } else { - DCHECK_EQ(increment_, -1); - if (inclusive_) { - cond = new (graph->GetArena()) HLessThan(initial_, end_); - } else { - cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_); - } - } - HIf* h_if = new (graph->GetArena()) HIf(cond); - if_block->AddInstruction(cond); - if_block->AddInstruction(h_if); - } - - // Adds a check that (value >= constant), and HDeoptimize otherwise. - void AddDeoptimizationConstant(HInstruction* value, - int32_t constant, - HBasicBlock* deopt_block, - bool loop_entry_test_block_added) { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); - if (loop_entry_test_block_added) { - DCHECK(deopt_block->GetSuccessors()[0] == pre_header); - } else { - DCHECK(deopt_block == pre_header); - } - HGraph* graph = header->GetGraph(); - HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); - if (loop_entry_test_block_added) { - DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]); - } - - HIntConstant* const_instr = graph->GetIntConstant(constant); - HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); - HDeoptimize* deoptimize = new (graph->GetArena()) - HDeoptimize(cond, suspend_check->GetDexPc()); - deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); - deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction()); - deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), header); - } - - // Returns true if adding a (value <= array_length + offset) check for deoptimization - // is allowed and will benefit compiled code. - bool CanAddDeoptimizationArrayLength(HInstruction* value, - HArrayLength* array_length, - int32_t offset, - bool* is_proven) { - *is_proven = false; - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); - DCHECK(value->GetBlock()->Dominates(pre_header)); - - if (array_length->GetBlock() == header) { - // array_length_in_loop_body_if_needed only has correct value when the loop - // body is entered. We bail out in this case. Usually array_length defined - // in the loop header is already hoisted by licm. - return false; - } else { - // array_length is defined either before the loop header already, or in - // the loop body since it's used in the loop body. If it's defined in the loop body, - // a phi array_length_in_loop_body_if_needed is used to replace it. In that case, - // all the uses of array_length must be dominated by its definition in the loop - // body. array_length_in_loop_body_if_needed is guaranteed to be the same as - // array_length once the loop body is entered so all the uses of the phi will - // use the correct value. - } - - if (offset > 0) { - // There might be overflow issue. - // TODO: handle this, possibly with some distance relationship between - // offset_low and offset_high, or using another deoptimization to make - // sure (array_length + offset) doesn't overflow. - return false; - } - - // See if we can prove the relationship first. - if (value == array_length) { - if (offset >= 0) { - // Already true. - *is_proven = true; - return true; - } else { - // May throw exception. Don't add deoptimization. - // Keep bounds checks in the loops. - return false; - } - } - // Can benefit from deoptimization. - return true; - } - - // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. - void AddDeoptimizationArrayLength(HInstruction* value, - HArrayLength* array_length, - int32_t offset, - HBasicBlock* deopt_block, - bool loop_entry_test_block_added) { - HBasicBlock* header = induction_variable_->GetBlock(); - DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); - if (loop_entry_test_block_added) { - DCHECK(deopt_block->GetSuccessors()[0] == pre_header); - } else { - DCHECK(deopt_block == pre_header); - } - HGraph* graph = header->GetGraph(); - HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); - - // We may need to hoist null-check and array_length out of loop first. - if (!array_length->GetBlock()->Dominates(deopt_block)) { - // array_length must be defined in the loop body. - DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock())); - DCHECK(array_length->GetBlock() != header); - - HInstruction* array = array_length->InputAt(0); - HNullCheck* null_check = array->AsNullCheck(); - if (null_check != nullptr) { - array = null_check->InputAt(0); - } - // We've already made sure the array is defined before the loop when collecting - // array accesses for the loop. - DCHECK(array->GetBlock()->Dominates(deopt_block)); - if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) { - // Hoist null check out of loop with a deoptimization. - HNullConstant* null_constant = graph->GetNullConstant(); - HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); - // TODO: for one dex_pc, share the same deoptimization slow path. - HDeoptimize* null_check_deoptimize = new (graph->GetArena()) - HDeoptimize(null_check_cond, suspend_check->GetDexPc()); - deopt_block->InsertInstructionBefore( - null_check_cond, deopt_block->GetLastInstruction()); - deopt_block->InsertInstructionBefore( - null_check_deoptimize, deopt_block->GetLastInstruction()); - // Eliminate null check in the loop. - null_check->ReplaceWith(array); - null_check->GetBlock()->RemoveInstruction(null_check); - null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), header); - } - - HArrayLength* new_array_length - = new (graph->GetArena()) HArrayLength(array, array->GetDexPc()); - deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction()); - - if (loop_entry_test_block_added) { - // Replace array_length defined inside the loop body with a phi - // array_length_in_loop_body_if_needed. This is a synthetic phi so there is - // no vreg number for it. - HPhi* phi = new (graph->GetArena()) HPhi( - graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt); - // Set to 0 if the loop body isn't entered. - phi->SetRawInputAt(0, graph->GetIntConstant(0)); - // Set to array.length if the loop body is entered. - phi->SetRawInputAt(1, new_array_length); - pre_header->AddPhi(phi); - array_length->ReplaceWith(phi); - // Make sure phi is only used after the loop body is entered. - if (kIsDebugBuild) { - for (HUseIterator<HInstruction*> it(phi->GetUses()); - !it.Done(); - it.Advance()) { - HInstruction* user = it.Current()->GetUser(); - DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock())); - } - } - } else { - array_length->ReplaceWith(new_array_length); - } - - array_length->GetBlock()->RemoveInstruction(array_length); - // Use new_array_length for deopt. - array_length = new_array_length; - } - - HInstruction* added = array_length; - if (offset != 0) { - HIntConstant* offset_instr = graph->GetIntConstant(offset); - added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); - deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction()); - } - HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added); - HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); - deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); - deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction()); - deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header); - } - - // Adds deoptimizations in loop pre-header with the collected array access - // data so that value ranges can be established in loop body. - // Returns true if deoptimizations are successfully added, or if it's proven - // it's not necessary. - bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) { - int32_t offset_low = finder.GetOffsetLow(); - int32_t offset_high = finder.GetOffsetHigh(); - HArrayLength* array_length = finder.GetFoundArrayLength(); - - HBasicBlock* pre_header = - induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader(); - if (!initial_->GetBlock()->Dominates(pre_header) || - !end_->GetBlock()->Dominates(pre_header)) { - // Can't move initial_ or end_ into pre_header for comparisons. - return false; - } - - HBasicBlock* deopt_block; - bool loop_entry_test_block_added = false; - bool is_constant_proven, is_length_proven; - - HInstruction* const_comparing_instruction; - int32_t const_compared_to; - HInstruction* array_length_comparing_instruction; - int32_t array_length_offset; - if (increment_ == 1) { - // Increasing from initial_ to end_. - const_comparing_instruction = initial_; - const_compared_to = -offset_low; - array_length_comparing_instruction = end_; - array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high; - } else { - const_comparing_instruction = end_; - const_compared_to = inclusive_ ? -offset_low : -offset_low - 1; - array_length_comparing_instruction = initial_; - array_length_offset = -offset_high - 1; - } - - if (CanAddDeoptimizationConstant(const_comparing_instruction, - const_compared_to, - &is_constant_proven) && - CanAddDeoptimizationArrayLength(array_length_comparing_instruction, - array_length, - array_length_offset, - &is_length_proven)) { - if (!is_constant_proven || !is_length_proven) { - deopt_block = TransformLoopForDeoptimizationIfNeeded(); - loop_entry_test_block_added = (deopt_block != pre_header); - if (loop_entry_test_block_added) { - // Loop body may be entered. - AddLoopBodyEntryTest(); - } - } - if (!is_constant_proven) { - AddDeoptimizationConstant(const_comparing_instruction, - const_compared_to, - deopt_block, - loop_entry_test_block_added); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(array_length_comparing_instruction, - array_length, - array_length_offset, - deopt_block, - loop_entry_test_block_added); - } - return true; - } - return false; - } - private: HPhi* const induction_variable_; // Induction variable for this monotonic value range. HInstruction* const initial_; // Initial value. - HInstruction* end_; // End value. - bool inclusive_; // Whether end value is inclusive. const int32_t increment_; // Increment for each loop iteration. const ValueBound bound_; // Additional value bound info for initial_. @@ -1111,7 +499,9 @@ class BCEVisitor : public HGraphVisitor { return block->GetBlockId() >= initial_block_size_; } - BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis) + BCEVisitor(HGraph* graph, + const SideEffectsAnalysis& side_effects, + HInductionVarAnalysis* induction_analysis) : HGraphVisitor(graph), maps_(graph->GetBlocks().size(), ArenaSafeMap<int, ValueRange*>( @@ -1121,8 +511,17 @@ class BCEVisitor : public HGraphVisitor { first_constant_index_bounds_check_map_( std::less<int>(), graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + early_exit_loop_( + std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + taken_test_loop_( + std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), need_to_revisit_block_(false), + has_deoptimization_on_constant_subscripts_(false), initial_block_size_(graph->GetBlocks().size()), + side_effects_(side_effects), induction_range_(induction_analysis) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -1138,6 +537,17 @@ class BCEVisitor : public HGraphVisitor { } } + void Finish() { + // Preserve SSA structure which may have been broken by adding one or more + // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()). + InsertPhiNodes(); + + // Clear the loop data structures. + early_exit_loop_.clear(); + taken_test_loop_.clear(); + finite_loop_.clear(); + } + private: // Return the map of proven value ranges at the beginning of a basic block. ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { @@ -1166,25 +576,6 @@ class BCEVisitor : public HGraphVisitor { return nullptr; } - // Return the range resulting from induction variable analysis of "instruction" when the value - // is used from "context", for example, an index used from a bounds-check inside a loop body. - ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) { - InductionVarRange::Value v1; - InductionVarRange::Value v2; - bool needs_finite_test = false; - induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test); - if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && - v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { - DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); - DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); - ValueBound low = ValueBound(v1.instruction, v1.b_constant); - ValueBound up = ValueBound(v2.instruction, v2.b_constant); - return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up); - } - // Didn't find anything useful. - return nullptr; - } - // Narrow the value range of `instruction` at the end of `basic_block` with `range`, // and push the narrowed value range to `successor`. void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block, @@ -1330,17 +721,6 @@ class BCEVisitor : public HGraphVisitor { bool overflow, underflow; if (cond == kCondLT || cond == kCondLE) { - if (left_monotonic_range != nullptr) { - // Update the info for monotonic value range. - if (left_monotonic_range->GetInductionVariable() == left && - left_monotonic_range->GetIncrement() < 0 && - block == left_monotonic_range->GetLoopHeader() && - instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { - left_monotonic_range->SetEnd(right); - left_monotonic_range->SetInclusive(cond == kCondLT); - } - } - if (!upper.Equals(ValueBound::Max())) { int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); @@ -1364,17 +744,6 @@ class BCEVisitor : public HGraphVisitor { ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondGT || cond == kCondGE) { - if (left_monotonic_range != nullptr) { - // Update the info for monotonic value range. - if (left_monotonic_range->GetInductionVariable() == left && - left_monotonic_range->GetIncrement() > 0 && - block == left_monotonic_range->GetLoopHeader() && - instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { - left_monotonic_range->SetEnd(right); - left_monotonic_range->SetInclusive(cond == kCondGT); - } - } - // array.length as a lower bound isn't considered useful. if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive @@ -1400,38 +769,34 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitBoundsCheck(HBoundsCheck* bounds_check) { + void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength() || array_length->IsPhi()); - - if (array_length->IsPhi()) { - // Input 1 of the phi contains the real array.length once the loop body is - // entered. That value will be used for bound analysis. The graph is still - // strictly in SSA form. - array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength(); - } + bool try_dynamic_bce = true; if (!index->IsIntConstant()) { + // Non-constant subscript. ValueBound lower = ValueBound(nullptr, 0); // constant 0 ValueBound upper = ValueBound(array_length, -1); // array_length - 1 ValueRange array_range(GetGraph()->GetArena(), lower, upper); - // Try range obtained by local analysis. + // Try range obtained by dominator-based analysis. ValueRange* index_range = LookupValueRange(index, block); if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceBoundsCheck(bounds_check, index); + ReplaceInstruction(bounds_check, index); return; } // Try range obtained by induction variable analysis. - index_range = LookupInductionRange(bounds_check, index); - if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceBoundsCheck(bounds_check, index); + // Disables dynamic bce if OOB is certain. + if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) { + ReplaceInstruction(bounds_check, index); return; } } else { + // Constant subscript. int32_t constant = index->AsIntConstant()->GetValue(); if (constant < 0) { // Will always throw exception. @@ -1439,7 +804,7 @@ class BCEVisitor : public HGraphVisitor { } if (array_length->IsIntConstant()) { if (constant < array_length->AsIntConstant()->GetValue()) { - ReplaceBoundsCheck(bounds_check, index); + ReplaceInstruction(bounds_check, index); } return; } @@ -1450,7 +815,7 @@ class BCEVisitor : public HGraphVisitor { ValueBound lower = existing_range->GetLower(); DCHECK(lower.IsConstant()); if (constant < lower.GetConstant()) { - ReplaceBoundsCheck(bounds_check, index); + ReplaceInstruction(bounds_check, index); return; } else { // Existing range isn't strong enough to eliminate the bounds check. @@ -1485,11 +850,11 @@ class BCEVisitor : public HGraphVisitor { ValueRange(GetGraph()->GetArena(), lower, upper); GetValueRangeMap(block)->Overwrite(array_length->GetId(), range); } - } - void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) { - bounds_check->ReplaceWith(index); - bounds_check->GetBlock()->RemoveInstruction(bounds_check); + // If static analysis fails, and OOB is not certain, try dynamic elimination. + if (try_dynamic_bce) { + TryDynamicBCE(bounds_check); + } } static bool HasSameInputAtBackEdges(HPhi* phi) { @@ -1508,7 +873,7 @@ class BCEVisitor : public HGraphVisitor { return true; } - void VisitPhi(HPhi* phi) { + void VisitPhi(HPhi* phi) OVERRIDE { if (phi->IsLoopHeaderPhi() && (phi->GetType() == Primitive::kPrimInt) && HasSameInputAtBackEdges(phi)) { @@ -1555,7 +920,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitIf(HIf* instruction) { + void VisitIf(HIf* instruction) OVERRIDE { if (instruction->InputAt(0)->IsCondition()) { HCondition* cond = instruction->InputAt(0)->AsCondition(); IfCondition cmp = cond->GetCondition(); @@ -1564,42 +929,11 @@ class BCEVisitor : public HGraphVisitor { HInstruction* left = cond->GetLeft(); HInstruction* right = cond->GetRight(); HandleIf(instruction, left, right, cmp); - - HBasicBlock* block = instruction->GetBlock(); - ValueRange* left_range = LookupValueRange(left, block); - if (left_range == nullptr) { - return; - } - - if (left_range->IsMonotonicValueRange() && - block == left_range->AsMonotonicValueRange()->GetLoopHeader()) { - // The comparison is for an induction variable in the loop header. - DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); - HBasicBlock* loop_body_successor = - left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop(); - if (loop_body_successor == nullptr) { - // In case it's some strange loop structure. - return; - } - ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); - if ((new_left_range == left_range) || - // Range narrowed with deoptimization is usually more useful than - // a constant range. - new_left_range->IsConstantValueRange()) { - // We are not successful in narrowing the monotonic value range to - // a regular value range. Try using deoptimization. - new_left_range = left_range->AsMonotonicValueRange()-> - NarrowWithDeoptimization(); - if (new_left_range != left_range) { - GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range); - } - } - } } } } - void VisitAdd(HAdd* add) { + void VisitAdd(HAdd* add) OVERRIDE { HInstruction* right = add->GetRight(); if (right->IsIntConstant()) { ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock()); @@ -1613,7 +947,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitSub(HSub* sub) { + void VisitSub(HSub* sub) OVERRIDE { HInstruction* left = sub->GetLeft(); HInstruction* right = sub->GetRight(); if (right->IsIntConstant()) { @@ -1715,19 +1049,19 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitDiv(HDiv* div) { + void VisitDiv(HDiv* div) OVERRIDE { FindAndHandlePartialArrayLength(div); } - void VisitShr(HShr* shr) { + void VisitShr(HShr* shr) OVERRIDE { FindAndHandlePartialArrayLength(shr); } - void VisitUShr(HUShr* ushr) { + void VisitUShr(HUShr* ushr) OVERRIDE { FindAndHandlePartialArrayLength(ushr); } - void VisitAnd(HAnd* instruction) { + void VisitAnd(HAnd* instruction) OVERRIDE { if (instruction->GetRight()->IsIntConstant()) { int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue(); if (constant > 0) { @@ -1742,7 +1076,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitNewArray(HNewArray* new_array) { + void VisitNewArray(HNewArray* new_array) OVERRIDE { HInstruction* len = new_array->InputAt(0); if (!len->IsIntConstant()) { HInstruction *left; @@ -1766,9 +1100,12 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitDeoptimize(HDeoptimize* deoptimize) { - // Right now it's only HLessThanOrEqual. - DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual()); + void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE { + if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) { + return; + } + // If this instruction was added by AddCompareWithDeoptimization(), narrow + // the range accordingly in subsequent basic blocks. HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual(); HInstruction* instruction = less_than_or_equal->InputAt(0); if (instruction->IsArrayLength()) { @@ -1782,6 +1119,35 @@ class BCEVisitor : public HGraphVisitor { } } + /** + * After null/bounds checks are eliminated, some invariant array references + * may be exposed underneath which can be hoisted out of the loop to the + * preheader or, in combination with dynamic bce, the deoptimization block. + * + * for (int i = 0; i < n; i++) { + * <-------+ + * for (int j = 0; j < n; j++) | + * a[i][j] = 0; --a[i]--+ + * } + * + * Note: this optimization is no longer applied after deoptimization on array references + * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in + * those cases it would be unsafe to hoist array references across their deoptimization + * instruction inside a loop. + */ + void VisitArrayGet(HArrayGet* array_get) OVERRIDE { + if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) { + HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation(); + if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) && + loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) { + SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader()); + if (!array_get->GetSideEffects().MayDependOn(loop_effects)) { + HoistToPreheaderOrDeoptBlock(loop, array_get); + } + } + } + } + void AddCompareWithDeoptimization(HInstruction* array_length, HIntConstant* const_instr, HBasicBlock* block) { @@ -1803,6 +1169,9 @@ class BCEVisitor : public HGraphVisitor { block->InsertInstructionBefore(cond, bounds_check); block->InsertInstructionBefore(deoptimize, bounds_check); deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); + // Flag that this kind of deoptimization on array references with constant + // subscripts has occurred to prevent further hoisting of these references. + has_deoptimization_on_constant_subscripts_ = true; } void AddComparesWithDeoptimization(HBasicBlock* block) { @@ -1846,21 +1215,432 @@ class BCEVisitor : public HGraphVisitor { } } + /** + * Returns true if static range analysis based on induction variables can determine the bounds + * check on the given array range is always satisfied with the computed index range. The output + * parameter try_dynamic_bce is set to false if OOB is certain. + */ + bool InductionRangeFitsIn(ValueRange* array_range, + HInstruction* context, + HInstruction* index, + bool* try_dynamic_bce) { + InductionVarRange::Value v1; + InductionVarRange::Value v2; + bool needs_finite_test = false; + induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test); + do { + if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && + v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { + DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); + DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); + ValueRange index_range(GetGraph()->GetArena(), + ValueBound(v1.instruction, v1.b_constant), + ValueBound(v2.instruction, v2.b_constant)); + // If analysis reveals a certain OOB, disable dynamic BCE. + if (index_range.GetLower().LessThan(array_range->GetLower()) || + index_range.GetUpper().GreaterThan(array_range->GetUpper())) { + *try_dynamic_bce = false; + return false; + } + // Use analysis for static bce only if loop is finite. + if (!needs_finite_test && index_range.FitsIn(array_range)) { + return true; + } + } + } while (induction_range_.RefineOuter(&v1, &v2)); + return false; + } + + /** + * When the compiler fails to remove a bounds check statically, we try to remove the bounds + * check dynamically by adding runtime tests that trigger a deoptimization in case bounds + * will go out of range (we want to be rather certain of that given the slowdown of + * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding + * bounds checks and related null checks removed. + */ + void TryDynamicBCE(HBoundsCheck* instruction) { + HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation(); + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + // If dynamic bounds check elimination seems profitable and is possible, then proceed. + bool needs_finite_test = false; + bool needs_taken_test = false; + if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) && + induction_range_.CanGenerateCode( + instruction, index, &needs_finite_test, &needs_taken_test) && + CanHandleInfiniteLoop(loop, index, needs_finite_test) && + CanHandleLength(loop, length, needs_taken_test)) { // do this test last (may code gen) + HInstruction* lower = nullptr; + HInstruction* upper = nullptr; + // Generate the following unsigned comparisons + // if (lower > upper) deoptimize; + // if (upper >= length) deoptimize; + // or, for a non-induction index, just the unsigned comparison on its 'upper' value + // if (upper >= length) deoptimize; + // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit + // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests + // correctly guard against any possible OOB (including arithmetic wrap-around cases). + HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper); + if (lower != nullptr) { + InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper)); + } + InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length)); + ReplaceInstruction(instruction, index); + } + } + + /** + * Returns true if heuristics indicate that dynamic bce may be profitable. + */ + bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) { + if (loop != nullptr) { + // A try boundary preheader is hard to handle. + // TODO: remove this restriction + if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) { + return false; + } + // Does loop have early-exits? If so, the full range may not be covered by the loop + // at runtime and testing the range may apply deoptimization unnecessarily. + if (IsEarlyExitLoop(loop)) { + return false; + } + // Does the current basic block dominate all back edges? If not, + // don't apply dynamic bce to something that may not be executed. + for (HBasicBlock* back_edge : loop->GetBackEdges()) { + if (!block->Dominates(back_edge)) { + return false; + } + } + // Success! + return true; + } + return false; + } + + /** + * Returns true if the loop has early exits, which implies it may not cover + * the full range computed by range analysis based on induction variables. + */ + bool IsEarlyExitLoop(HLoopInformation* loop) { + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + // If loop has been analyzed earlier for early-exit, don't repeat the analysis. + auto it = early_exit_loop_.find(loop_id); + if (it != early_exit_loop_.end()) { + return it->second; + } + // First time early-exit analysis for this loop. Since analysis requires scanning + // the full loop-body, results of the analysis is stored for subsequent queries. + HBlocksInLoopReversePostOrderIterator it_loop(*loop); + for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) { + for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) { + if (!loop->Contains(*successor)) { + early_exit_loop_.Put(loop_id, true); + return true; + } + } + } + early_exit_loop_.Put(loop_id, false); + return false; + } + + /** + * Returns true if the array length is already loop invariant, or can be made so + * by handling the null check under the hood of the array length operation. + */ + bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) { + if (loop->IsDefinedOutOfTheLoop(length)) { + return true; + } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) { + if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) { + HoistToPreheaderOrDeoptBlock(loop, length); + return true; + } + } + return false; + } + + /** + * Returns true if the null check is already loop invariant, or can be made so + * by generating a deoptimization test. + */ + bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) { + if (loop->IsDefinedOutOfTheLoop(check)) { + return true; + } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) { + HInstruction* array = check->InputAt(0); + if (loop->IsDefinedOutOfTheLoop(array)) { + // Generate: if (array == null) deoptimize; + HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HInstruction* cond = + new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); + InsertDeopt(loop, block, cond); + ReplaceInstruction(check, array); + return true; + } + } + return false; + } + + /** + * Returns true if compiler can apply dynamic bce to loops that may be infinite + * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate + * the range analysis evaluation code by "overshooting" the computed range. + * Since deoptimization would be a bad choice, and there is no other version + * of the loop to use, dynamic bce in such cases is only allowed if other tests + * ensure the loop is finite. + */ + bool CanHandleInfiniteLoop( + HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) { + if (needs_infinite_test) { + // If we already forced the loop to be finite, allow directly. + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + if (finite_loop_.find(loop_id) != finite_loop_.end()) { + return true; + } + // Otherwise, allow dynamic bce if the index (which is necessarily an induction at + // this point) is the direct loop index (viz. a[i]), since then the runtime tests + // ensure upper bound cannot cause an infinite loop. + HInstruction* control = loop->GetHeader()->GetLastInstruction(); + if (control->IsIf()) { + HInstruction* if_expr = control->AsIf()->InputAt(0); + if (if_expr->IsCondition()) { + HCondition* condition = if_expr->AsCondition(); + if (index == condition->InputAt(0) || + index == condition->InputAt(1)) { + finite_loop_.insert(loop_id); + return true; + } + } + } + return false; + } + return true; + } + + /** Inserts a deoptimization test. */ + void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { + HInstruction* suspend = loop->GetSuspendCheck(); + block->InsertInstructionBefore(condition, block->GetLastInstruction()); + HDeoptimize* deoptimize = + new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc()); + block->InsertInstructionBefore(deoptimize, block->GetLastInstruction()); + if (suspend->HasEnvironment()) { + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend->GetEnvironment(), loop->GetHeader()); + } + } + + /** Hoists instruction out of the loop to preheader or deoptimization block. */ + void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { + // Use preheader unless there is an earlier generated deoptimization block since + // hoisted expressions may depend on and/or used by the deoptimization tests. + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + HBasicBlock* preheader = loop->GetPreHeader(); + HBasicBlock* block = preheader; + auto it = taken_test_loop_.find(loop_id); + if (it != taken_test_loop_.end()) { + block = it->second; + } + // Hoist the instruction. + DCHECK(!instruction->HasEnvironment()); + instruction->MoveBefore(block->GetLastInstruction()); + } + + /** + * Adds a new taken-test structure to a loop if needed (and not already done). + * The taken-test protects range analysis evaluation code to avoid any + * deoptimization caused by incorrect trip-count evaluation in non-taken loops. + * + * Returns block in which deoptimizations/invariants can be put. + * + * old_preheader + * | + * if_block <- taken-test protects deoptimization block + * / \ + * true_block false_block <- deoptimizations/invariants are placed in true_block + * \ / + * new_preheader <- may require phi nodes to preserve SSA structure + * | + * header + * + * For example, this loop: + * + * for (int i = lower; i < upper; i++) { + * array[i] = 0; + * } + * + * will be transformed to: + * + * if (lower < upper) { + * if (array == null) deoptimize; + * array_length = array.length; + * if (lower > upper) deoptimize; // unsigned + * if (upper >= array_length) deoptimize; // unsigned + * } else { + * array_length = 0; + * } + * for (int i = lower; i < upper; i++) { + * // Loop without null check and bounds check, and any array.length replaced with array_length. + * array[i] = 0; + * } + */ + HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { + // Not needed (can use preheader), or already done (can reuse)? + const uint32_t loop_id = loop->GetHeader()->GetBlockId(); + if (!needs_taken_test) { + return loop->GetPreHeader(); + } else { + auto it = taken_test_loop_.find(loop_id); + if (it != taken_test_loop_.end()) { + return it->second; + } + } + + // Generate top test structure. + HBasicBlock* header = loop->GetHeader(); + GetGraph()->TransformLoopHeaderForBCE(header); + HBasicBlock* new_preheader = loop->GetPreHeader(); + HBasicBlock* if_block = new_preheader->GetDominator(); + HBasicBlock* true_block = if_block->GetSuccessors()[0]; // True successor. + HBasicBlock* false_block = if_block->GetSuccessors()[1]; // False successor. + + // Goto instructions. + true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); + false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); + new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto()); + + // Insert the taken-test to see if the loop body is entered. If the + // loop isn't entered at all, it jumps around the deoptimization block. + if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); // placeholder + HInstruction* condition = nullptr; + induction_range_.GenerateTakenTest(header->GetLastInstruction(), + GetGraph(), + if_block, + &condition); + DCHECK(condition != nullptr); + if_block->RemoveInstruction(if_block->GetLastInstruction()); + if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition)); + + taken_test_loop_.Put(loop_id, true_block); + return true_block; + } + + /** + * Inserts phi nodes that preserve SSA structure in generated top test structures. + * All uses of instructions in the deoptimization block that reach the loop need + * a phi node in the new loop preheader to fix the dominance relation. + * + * Example: + * if_block + * / \ + * x_0 = .. false_block + * \ / + * x_1 = phi(x_0, null) <- synthetic phi + * | + * header + */ + void InsertPhiNodes() { + // Scan all new deoptimization blocks. + for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) { + HBasicBlock* true_block = it1->second; + HBasicBlock* new_preheader = true_block->GetSingleSuccessor(); + // Scan all instructions in a new deoptimization block. + for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + Primitive::Type type = instruction->GetType(); + HPhi* phi = nullptr; + // Scan all uses of an instruction and replace each later use with a phi node. + for (HUseIterator<HInstruction*> it2(instruction->GetUses()); + !it2.Done(); + it2.Advance()) { + HInstruction* user = it2.Current()->GetUser(); + if (user->GetBlock() != true_block) { + if (phi == nullptr) { + phi = NewPhi(new_preheader, instruction, type); + } + user->ReplaceInput(phi, it2.Current()->GetIndex()); + } + } + // Scan all environment uses of an instruction and replace each later use with a phi node. + for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses()); + !it2.Done(); + it2.Advance()) { + HEnvironment* user = it2.Current()->GetUser(); + if (user->GetHolder()->GetBlock() != true_block) { + if (phi == nullptr) { + phi = NewPhi(new_preheader, instruction, type); + } + user->RemoveAsUserOfInput(it2.Current()->GetIndex()); + user->SetRawEnvAt(it2.Current()->GetIndex(), phi); + phi->AddEnvUseAt(user, it2.Current()->GetIndex()); + } + } + } + } + } + + /** + * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation. + * These are synthetic phi nodes without a virtual register. + */ + HPhi* NewPhi(HBasicBlock* new_preheader, + HInstruction* instruction, + Primitive::Type type) { + HGraph* graph = GetGraph(); + HInstruction* zero; + switch (type) { + case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break; + case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break; + case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break; + default: zero = graph->GetConstant(type, 0); break; + } + HPhi* phi = new (graph->GetArena()) + HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type)); + phi->SetRawInputAt(0, instruction); + phi->SetRawInputAt(1, zero); + new_preheader->AddPhi(phi); + return phi; + } + + /** Helper method to replace an instruction with another instruction. */ + static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) { + instruction->ReplaceWith(replacement); + instruction->GetBlock()->RemoveInstruction(instruction); + } + + // A set of maps, one per basic block, from instruction to range. ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_; // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in // a block that checks a constant index against that HArrayLength. ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; + // Early-exit loop bookkeeping. + ArenaSafeMap<uint32_t, bool> early_exit_loop_; + + // Taken-test loop bookkeeping. + ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_; + + // Finite loop bookkeeping. + ArenaSet<uint32_t> finite_loop_; + // For the block, there is at least one HArrayLength instruction for which there // is more than one bounds check instruction with constant indexing. And it's // beneficial to add a compare instruction that has deoptimization fallback and // eliminate those bounds checks. bool need_to_revisit_block_; + // Flag that denotes whether deoptimization has occurred on array references + // with constant subscripts (see AddCompareWithDeoptimization()). + bool has_deoptimization_on_constant_subscripts_; + // Initial number of blocks. uint32_t initial_block_size_; + // Side effects. + const SideEffectsAnalysis& side_effects_; + // Range analysis based on induction variables. InductionVarRange induction_range_; @@ -1872,14 +1652,12 @@ void BoundsCheckElimination::Run() { return; } - BCEVisitor visitor(graph_, induction_analysis_); // Reverse post order guarantees a node's dominators are visited first. // We want to visit in the dominator-based order since if a value is known to // be bounded by a range at one instruction, it must be true that all uses of // that value dominated by that instruction fits in that range. Range of that // value can be narrowed further down in the dominator tree. - // - // TODO: only visit blocks that dominate some array accesses. + BCEVisitor visitor(graph_, side_effects_, induction_analysis_); HBasicBlock* last_visited_block = nullptr; for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); @@ -1896,6 +1674,9 @@ void BoundsCheckElimination::Run() { visitor.VisitBasicBlock(current); last_visited_block = current; } + + // Perform cleanup. + visitor.Finish(); } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index cdff3ca0ba..b9df686ffd 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -21,12 +21,16 @@ namespace art { +class SideEffectsAnalysis; class HInductionVarAnalysis; class BoundsCheckElimination : public HOptimization { public: - BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis) + BoundsCheckElimination(HGraph* graph, + const SideEffectsAnalysis& side_effects, + HInductionVarAnalysis* induction_analysis) : HOptimization(graph, kBoundsCheckEliminiationPassName), + side_effects_(side_effects), induction_analysis_(induction_analysis) {} void Run() OVERRIDE; @@ -34,6 +38,7 @@ class BoundsCheckElimination : public HOptimization { static constexpr const char* kBoundsCheckEliminiationPassName = "BCE"; private: + const SideEffectsAnalysis& side_effects_; HInductionVarAnalysis* induction_analysis_; DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination); diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index c9afdf2147..dbeb1ccc22 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -54,7 +54,7 @@ class BoundsCheckEliminationTest : public testing::Test { HInductionVarAnalysis induction(graph_); induction.Run(); - BoundsCheckElimination(graph_, &induction).Run(); + BoundsCheckElimination(graph_, side_effects, &induction).Run(); } ArenaPool pool_; diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 32968a597b..4dd0d26b89 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -17,6 +17,8 @@ #include "builder.h" #include "art_field-inl.h" +#include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" #include "base/logging.h" #include "class_linker.h" #include "dex/verified_method.h" @@ -458,6 +460,19 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { return false; } + // Find locations where we want to generate extra stackmaps for native debugging. + // This allows us to generate the info only at interesting points (for example, + // at start of java statement) rather than before every dex instruction. + const bool native_debuggable = compiler_driver_ != nullptr && + compiler_driver_->GetCompilerOptions().GetNativeDebuggable(); + ArenaBitVector* native_debug_info_locations; + if (native_debuggable) { + const uint32_t num_instructions = code_item.insns_size_in_code_units_; + native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false); + native_debug_info_locations->ClearAllBits(); + FindNativeDebugInfoLocations(code_item, native_debug_info_locations); + } + CreateBlocksForTryCatch(code_item); InitializeParameters(code_item.ins_size_); @@ -467,6 +482,11 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // Update the current block if dex_pc starts a new block. MaybeUpdateCurrentBlock(dex_pc); const Instruction& instruction = *Instruction::At(code_ptr); + if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) { + if (current_block_ != nullptr) { + current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc)); + } + } if (!AnalyzeDexInstruction(instruction, dex_pc)) { return false; } @@ -507,6 +527,47 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) { current_block_ = block; } +void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, + ArenaBitVector* locations) { + // The callback gets called when the line number changes. + // In other words, it marks the start of new java statement. + struct Callback { + static bool Position(void* ctx, const DexFile::PositionInfo& entry) { + static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_); + return false; + } + }; + dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations); + // Add native debug info at the start of every basic block. + for (uint32_t pc = 0; pc < code_item.insns_size_in_code_units_; pc++) { + if (FindBlockStartingAt(pc) != nullptr) { + locations->SetBit(pc); + } + } + // Instruction-specific tweaks. + const Instruction* const begin = Instruction::At(code_item.insns_); + const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_); + for (const Instruction* inst = begin; inst < end; inst = inst->Next()) { + switch (inst->Opcode()) { + case Instruction::MOVE_EXCEPTION: + case Instruction::MOVE_RESULT: + case Instruction::MOVE_RESULT_WIDE: + case Instruction::MOVE_RESULT_OBJECT: { + // The compiler checks that there are no instructions before those. + // So generate HNativeDebugInfo after them instead. + locations->ClearBit(inst->GetDexPc(code_item.insns_)); + const Instruction* next = inst->Next(); + if (next < end) { + locations->SetBit(next->GetDexPc(code_item.insns_)); + } + break; + } + default: + break; + } + } +} + bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, size_t* number_of_branches) { @@ -735,6 +796,79 @@ static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) { } } +ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<2> hs(soa.Self()); + + ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); + Handle<mirror::ClassLoader> class_loader(hs.NewHandle( + soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); + + ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>( + *dex_compilation_unit_->GetDexFile(), + method_idx, + dex_compilation_unit_->GetDexCache(), + class_loader, + /* referrer */ nullptr, + invoke_type); + + if (UNLIKELY(resolved_method == nullptr)) { + // Clean up any exception left by type resolution. + soa.Self()->ClearException(); + return nullptr; + } + + // Check access. The class linker has a fast path for looking into the dex cache + // and does not check the access if it hits it. + if (compiling_class.Get() == nullptr) { + if (!resolved_method->IsPublic()) { + return nullptr; + } + } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(), + resolved_method, + dex_compilation_unit_->GetDexCache().Get(), + method_idx)) { + return nullptr; + } + + // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not. + // We need to look at the referrer's super class vtable. + if (invoke_type == kSuper) { + if (compiling_class.Get() == nullptr) { + // Invoking a super method requires knowing the actual super class. If we did not resolve + // the compiling method's declaring class (which only happens for ahead of time compilation), + // bail out. + DCHECK(Runtime::Current()->IsAotCompiler()); + return nullptr; + } + uint16_t vtable_index = resolved_method->GetMethodIndex(); + ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry( + vtable_index, class_linker->GetImagePointerSize()); + if (actual_method != resolved_method && + !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { + // TODO: The actual method could still be referenced in the current dex file, so we + // could try locating it. + // TODO: Remove the dex_file restriction. + return nullptr; + } + if (!actual_method->IsInvokable()) { + // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub + // could resolve the callee to the wrong method. + return nullptr; + } + resolved_method = actual_method; + } + + // Check for incompatible class changes. The class linker has a fast path for + // looking into the dex cache and does not check incompatible class changes if it hits it. + if (resolved_method->CheckIncompatibleClassChange(invoke_type)) { + return nullptr; + } + + return resolved_method; +} + bool HGraphBuilder::BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, @@ -742,22 +876,18 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, bool is_range, uint32_t* args, uint32_t register_index) { - InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); - InvokeType optimized_invoke_type = original_invoke_type; + InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); const char* descriptor = dex_file_->GetMethodShorty(method_idx); Primitive::Type return_type = Primitive::GetType(descriptor[0]); // Remove the return type from the 'proto'. size_t number_of_arguments = strlen(descriptor) - 1; - if (original_invoke_type != kStatic) { // instance call + if (invoke_type != kStatic) { // instance call // One extra argument for 'this'. number_of_arguments++; } MethodReference target_method(dex_file_, method_idx); - int32_t table_index = 0; - uintptr_t direct_code = 0; - uintptr_t direct_method = 0; // Special handling for string init. int32_t string_init_offset = 0; @@ -780,7 +910,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, method_idx, target_method, dispatch_info, - original_invoke_type, + invoke_type, kStatic /* optimized_invoke_type */, HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); return HandleStringInit(invoke, @@ -791,23 +921,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, descriptor); } - // Handle unresolved methods. - if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, - dex_pc, - true /* update_stats */, - true /* enable_devirtualization */, - &optimized_invoke_type, - &target_method, - &table_index, - &direct_code, - &direct_method)) { + ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type); + + if (resolved_method == nullptr) { MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod); HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_, number_of_arguments, return_type, dex_pc, method_idx, - original_invoke_type); + invoke_type); return HandleInvoke(invoke, number_of_vreg_arguments, args, @@ -817,21 +940,26 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, nullptr /* clinit_check */); } - // Handle resolved methods (non string init). - - DCHECK(optimized_invoke_type != kSuper); - // Potential class initialization check, in the case of a static method call. HClinitCheck* clinit_check = nullptr; HInvoke* invoke = nullptr; - if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) { + if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) { // By default, consider that the called method implicitly requires // an initialization check of its declaring method. HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit; - if (optimized_invoke_type == kStatic) { - clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement); + ScopedObjectAccess soa(Thread::Current()); + if (invoke_type == kStatic) { + clinit_check = ProcessClinitCheckForInvoke( + dex_pc, resolved_method, method_idx, &clinit_check_requirement); + } else if (invoke_type == kSuper) { + if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { + // Update the target method to the one resolved. Note that this may be a no-op if + // we resolved to the method referenced by the instruction. + method_idx = resolved_method->GetDexMethodIndex(); + target_method = MethodReference(dex_file_, method_idx); + } } HInvokeStaticOrDirect::DispatchInfo dispatch_info = { @@ -847,24 +975,26 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, method_idx, target_method, dispatch_info, - original_invoke_type, - optimized_invoke_type, + invoke_type, + invoke_type, clinit_check_requirement); - } else if (optimized_invoke_type == kVirtual) { + } else if (invoke_type == kVirtual) { + ScopedObjectAccess soa(Thread::Current()); // Needed for the method index invoke = new (arena_) HInvokeVirtual(arena_, number_of_arguments, return_type, dex_pc, method_idx, - table_index); + resolved_method->GetMethodIndex()); } else { - DCHECK_EQ(optimized_invoke_type, kInterface); + DCHECK_EQ(invoke_type, kInterface); + ScopedObjectAccess soa(Thread::Current()); // Needed for the method index invoke = new (arena_) HInvokeInterface(arena_, number_of_arguments, return_type, dex_pc, method_idx, - table_index); + resolved_method->GetDexMethodIndex()); } return HandleInvoke(invoke, @@ -905,14 +1035,15 @@ bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) { HLoadClass* load_class = new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + outer_dex_file, IsOutermostCompilingClass(type_index), dex_pc, - /*needs_access_check*/ can_throw); + /*needs_access_check*/ can_throw, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index)); current_block_->AddInstruction(load_class); HInstruction* cls = load_class; - if (!IsInitialized(resolved_class, type_index)) { + if (!IsInitialized(resolved_class)) { cls = new (arena_) HClinitCheck(load_class, dex_pc); current_block_->AddInstruction(cls); } @@ -929,39 +1060,52 @@ bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) { return true; } -bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const { +static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) + SHARED_REQUIRES(Locks::mutator_lock_) { + return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); +} + +bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const { if (cls.Get() == nullptr) { return false; } - if (GetOutermostCompilingClass() == cls.Get()) { + + // `CanAssumeClassIsLoaded` will return true if we're JITting, or will + // check whether the class is in an image for the AOT compilation. + if (cls->IsInitialized() && + compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) { + return true; + } + + if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) { + return true; + } + + // TODO: We should walk over the inlined methods, but we don't pass + // that information to the builder. + if (IsSubClass(GetCompilingClass(), cls.Get())) { return true; } - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), type_index); - return cls->IsInitialized() && is_in_dex_cache; + + return false; } HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( uint32_t dex_pc, + ArtMethod* resolved_method, uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<5> hs(soa.Self()); + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Thread* self = Thread::Current(); + StackHandleScope<4> hs(self); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( - soa.Self(), *dex_compilation_unit_->GetDexFile()))); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); - ArtMethod* resolved_method = compiler_driver_->ResolveMethod( - soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic); - - DCHECK(resolved_method != nullptr); - - const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + self, *dex_compilation_unit_->GetDexFile()))); Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( - outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); + outer_compilation_unit_->GetClassLinker()->FindDexCache( + self, outer_dex_file))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); + Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass())); // The index at which the method's class is stored in the DexCache's type array. uint32_t storage_index = DexFile::kDexNoIndex; @@ -979,36 +1123,21 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( HClinitCheck* clinit_check = nullptr; - if (!outer_class->IsInterface() - && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) { - // If the outer class is the declaring class or a subclass - // of the declaring class, no class initialization is needed - // before the static method call. - // Note that in case of inlining, we do not need to add clinit checks - // to calls that satisfy this subclass check with any inlined methods. This - // will be detected by the optimization passes. + if (IsInitialized(resolved_method_class)) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; } else if (storage_index != DexFile::kDexNoIndex) { - // If the method's class type index is available, check - // whether we should add an explicit class initialization - // check for its declaring class before the static method call. - - Handle<mirror::Class> cls(hs.NewHandle(resolved_method->GetDeclaringClass())); - if (IsInitialized(cls, storage_index)) { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; - } else { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; - HLoadClass* load_class = new (arena_) HLoadClass( - graph_->GetCurrentMethod(), - storage_index, - *dex_compilation_unit_->GetDexFile(), - is_outer_class, - dex_pc, - /*needs_access_check*/ false); - current_block_->AddInstruction(load_class); - clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); - current_block_->AddInstruction(clinit_check); - } + *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + storage_index, + outer_dex_file, + is_outer_class, + dex_pc, + /*needs_access_check*/ false, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index)); + current_block_->AddInstruction(load_class); + clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(clinit_check); } return clinit_check; } @@ -1379,18 +1508,21 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, } } + bool is_in_cache = + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index); HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(), storage_index, - *dex_compilation_unit_->GetDexFile(), + outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false); + /*needs_access_check*/ false, + is_in_cache); current_block_->AddInstruction(constant); HInstruction* cls = constant; Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass())); - if (!IsInitialized(klass, storage_index)) { + if (!IsInitialized(klass)) { cls = new (arena_) HClinitCheck(constant, dex_pc); current_block_->AddInstruction(cls); } @@ -1659,19 +1791,20 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<2> hs(soa.Self()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); Handle<mirror::DexCache> dex_cache(hs.NewHandle( - dex_compilation_unit_->GetClassLinker()->FindDexCache( - soa.Self(), *dex_compilation_unit_->GetDexFile()))); + dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file))); Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc); HLoadClass* cls = new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + dex_file, IsOutermostCompilingClass(type_index), dex_pc, - !can_access); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index)); current_block_->AddInstruction(cls); // The class needs a temporary before being used by the type check. @@ -2769,15 +2902,21 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::CONST_STRING: { + uint32_t string_index = instruction.VRegB_21c(); + bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache( + *dex_file_, string_index); current_block_->AddInstruction( - new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_21c(), dex_pc)); + new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } case Instruction::CONST_STRING_JUMBO: { + uint32_t string_index = instruction.VRegB_31c(); + bool in_dex_cache = compiler_driver_->CanAssumeStringIsPresentInDexCache( + *dex_file_, string_index); current_block_->AddInstruction( - new (arena_) HLoadString(graph_->GetCurrentMethod(), instruction.VRegB_31c(), dex_pc)); + new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, dex_pc, in_dex_cache)); UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc); break; } @@ -2797,10 +2936,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + *dex_file_, IsOutermostCompilingClass(type_index), dex_pc, - !can_access)); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index))); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 615b0cd738..26bf1cbc75 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -80,7 +80,8 @@ class HGraphBuilder : public ValueObject { can_use_baseline_for_string_init_(true), compilation_stats_(nullptr), interpreter_metadata_(nullptr), - dex_cache_(NullHandle<mirror::DexCache>()) {} + null_dex_cache_(), + dex_cache_(null_dex_cache_) {} bool BuildGraph(const DexFile::CodeItem& code); @@ -90,8 +91,9 @@ class HGraphBuilder : public ValueObject { static constexpr const char* kBuilderPassName = "builder"; - // The number of entries in a packed switch before we use a jump table. - static constexpr uint16_t kSmallSwitchThreshold = 5; + // The number of entries in a packed switch before we use a jump table or specified + // compare/jump series. + static constexpr uint16_t kSmallSwitchThreshold = 3; private: // Analyzes the dex instruction and adds HInstruction to the graph @@ -110,6 +112,7 @@ class HGraphBuilder : public ValueObject { const uint16_t* end, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t dex_pc); + void FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, ArenaBitVector* locations); HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const; HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc); @@ -305,17 +308,22 @@ class HGraphBuilder : public ValueObject { HClinitCheck* ProcessClinitCheckForInvoke( uint32_t dex_pc, + ArtMethod* method, uint32_t method_idx, - HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement); + HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) + SHARED_REQUIRES(Locks::mutator_lock_); // Build a HNewInstance instruction. bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc); - // Return whether the compiler can assume `cls` is initialized. `type_index` is the index - // of the class in the outer dex file. - bool IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const + // Return whether the compiler can assume `cls` is initialized. + bool IsInitialized(Handle<mirror::Class> cls) const SHARED_REQUIRES(Locks::mutator_lock_); + // Try to resolve a method using the class linker. Return null if a method could + // not be resolved. + ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type); + ArenaAllocator* const arena_; // A list of the size of the dex code holding block information for @@ -365,6 +373,7 @@ class HGraphBuilder : public ValueObject { const uint8_t* interpreter_metadata_; // Dex cache for dex_file_. + ScopedNullHandle<mirror::DexCache> null_dex_cache_; Handle<mirror::DexCache> dex_cache_; DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0baa0e30dc..53d3615a41 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -246,10 +246,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) InitLocationsBaseline(current); } DCHECK(CheckTypeConsistency(current)); - uintptr_t native_pc_begin = GetAssembler()->CodeSize(); current->Accept(instruction_visitor); - uintptr_t native_pc_end = GetAssembler()->CodeSize(); - RecordNativeDebugInfo(current->GetDexPc(), native_pc_begin, native_pc_end); } } @@ -926,17 +923,6 @@ void CodeGenerator::BuildStackMaps(MemoryRegion region) { stack_map_stream_.FillIn(region); } -void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc, - uintptr_t native_pc_begin, - uintptr_t native_pc_end) { - if (compiler_options_.GetGenerateDebugInfo() && - dex_pc != kNoDexPc && - native_pc_begin != native_pc_end) { - src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), - static_cast<int32_t>(dex_pc)})); - } -} - void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 114d97be94..eade05d7b6 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -269,8 +269,6 @@ class CodeGenerator { // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); - // Record additional native to dex mappings for native debugging/profiling tools. - void RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -452,10 +450,6 @@ class CodeGenerator { // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; - const ArenaVector<SrcMapElem>& GetSrcMappingTable() const { - return src_map_; - } - protected: // Method patch info used for recording locations of required linker patches and // target methods. The target method can be used for various purposes, whether for @@ -498,7 +492,6 @@ class CodeGenerator { stats_(stats), graph_(graph), compiler_options_(compiler_options), - src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_slow_path_(nullptr), current_block_index_(0), @@ -616,8 +609,6 @@ class CodeGenerator { HGraph* const graph_; const CompilerOptions& compiler_options_; - // Native to dex_pc map used for native debugging/profiling tools. - ArenaVector<SrcMapElem> src_map_; ArenaVector<SlowPathCode*> slow_paths_; // The current slow path that we're generating code for. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cf6f7e3338..58feb67a58 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -59,7 +59,7 @@ static constexpr SRegister kFpuCalleeSaves[] = // S registers. Therefore there is no need to block it. static constexpr DRegister DTMP = D31; -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() @@ -77,6 +77,7 @@ class NullCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -101,6 +102,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -123,6 +125,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode { SaveLiveRegisters(codegen, instruction_->GetLocations()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); @@ -179,6 +182,7 @@ class BoundsCheckSlowPathARM : public SlowPathCode { Primitive::kPrimInt); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -214,6 +218,11 @@ class LoadClassSlowPathARM : public SlowPathCode { ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -260,6 +269,7 @@ class LoadStringSlowPathARM : public SlowPathCode { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); RestoreLiveRegisters(codegen, locations); @@ -351,6 +361,7 @@ class DeoptimizationSlowPathARM : public SlowPathCode { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } @@ -393,6 +404,7 @@ class ArraySetSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -712,7 +724,9 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); } @@ -1604,7 +1618,15 @@ void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } -void LocationsBuilderARM::VisitCondition(HCondition* cond) { +void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + +void LocationsBuilderARM::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. @@ -1635,7 +1657,7 @@ void LocationsBuilderARM::VisitCondition(HCondition* cond) { } } -void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) { +void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { if (!cond->NeedsMaterialization()) { return; } @@ -1692,83 +1714,83 @@ void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) { } void LocationsBuilderARM::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitLocal(HLocal* local) { @@ -1910,10 +1932,18 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok codegen_->GetAssembler(), codegen_->GetInstructionSetFeatures()); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + + // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. + if (invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { @@ -2410,6 +2440,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2418,6 +2449,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -2463,6 +2495,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); break; case Primitive::kPrimDouble: @@ -2985,6 +3018,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R0, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } @@ -2999,6 +3033,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -3127,22 +3162,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { DCHECK_EQ(R1, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } case Primitive::kPrimFloat: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } @@ -3203,6 +3242,147 @@ void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) } } +void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations) { + Register in = locations->InAt(0).AsRegister<Register>(); + Location rhs = locations->InAt(1); + Register out = locations->Out().AsRegister<Register>(); + + if (rhs.IsConstant()) { + // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31], + // so map all rotations to a +ve. equivalent in that range. + // (e.g. left *or* right by -2 bits == 30 bits in the same direction.) + uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F; + if (rot) { + // Rotate, mapping left rotations to right equivalents if necessary. + // (e.g. left by 2 bits == right by 30.) + __ Ror(out, in, rot); + } else if (out != in) { + __ Mov(out, in); + } + } else { + __ Ror(out, in, rhs.AsRegister<Register>()); + } +} + +// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer +// rotates by swapping input regs (effectively rotating by the first 32-bits of +// a larger rotation) or flipping direction (thus treating larger right/left +// rotations as sub-word sized rotations in the other direction) as appropriate. +void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { + Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Location rhs = locations->InAt(1); + Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>(); + + if (rhs.IsConstant()) { + uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant()); + // Map all rotations to +ve. equivalents on the interval [0,63]. + rot &= kMaxLongShiftValue; + // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate + // logic below to a simple pair of binary orr. + // (e.g. 34 bits == in_reg swap + 2 bits right.) + if (rot >= kArmBitsPerWord) { + rot -= kArmBitsPerWord; + std::swap(in_reg_hi, in_reg_lo); + } + // Rotate, or mov to out for zero or word size rotations. + if (rot != 0u) { + __ Lsr(out_reg_hi, in_reg_hi, rot); + __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, kArmBitsPerWord - rot)); + __ Lsr(out_reg_lo, in_reg_lo, rot); + __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, kArmBitsPerWord - rot)); + } else { + __ Mov(out_reg_lo, in_reg_lo); + __ Mov(out_reg_hi, in_reg_hi); + } + } else { + Register shift_right = locations->GetTemp(0).AsRegister<Register>(); + Register shift_left = locations->GetTemp(1).AsRegister<Register>(); + Label end; + Label shift_by_32_plus_shift_right; + + __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F)); + __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6); + __ rsb(shift_left, shift_right, ShifterOperand(kArmBitsPerWord), AL, kCcKeep); + __ b(&shift_by_32_plus_shift_right, CC); + + // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). + // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). + __ Lsl(out_reg_hi, in_reg_hi, shift_left); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ Lsr(shift_left, in_reg_hi, shift_right); + __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left)); + __ b(&end); + + __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. + // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). + // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left). + __ Lsr(out_reg_hi, in_reg_hi, shift_right); + __ Lsl(out_reg_lo, in_reg_lo, shift_left); + __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); + __ Lsr(out_reg_lo, in_reg_lo, shift_right); + __ Lsl(shift_right, in_reg_hi, shift_left); + __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right)); + + __ Bind(&end); + } +} +void LocationsBuilderARM::HandleRotate(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + switch (ror->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + if (ror->InputAt(1)->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + } +} + +void InstructionCodeGeneratorARM::HandleRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + Primitive::Type type = ror->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + HandleIntegerRotate(locations); + break; + } + case Primitive::kPrimLong: { + HandleLongRotate(locations); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitRor(HRor* op) { + HandleRotate(op); +} + +void InstructionCodeGeneratorARM::VisitRor(HRor* op) { + HandleRotate(op); +} + void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); @@ -3437,6 +3617,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { @@ -3458,6 +3639,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { @@ -4330,7 +4512,7 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for read barrier too. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4947,6 +5129,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -4968,7 +5151,6 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ LoadFromOffset(kLoadWord, @@ -4987,14 +5169,19 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, out, cache_offset); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5029,16 +5216,15 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( } void LocationsBuilderARM::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); @@ -5069,8 +5255,12 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ LoadFromOffset(kLoadWord, out, out, cache_offset); } - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + if (!load->IsInDexCache()) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); + codegen_->AddSlowPath(slow_path); + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } static int32_t GetExceptionTlsOffset() { @@ -5107,6 +5297,7 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -5547,6 +5738,11 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); } @@ -5784,16 +5980,6 @@ void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) { - if (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) { - // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; - } if (desired_dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { const DexFile& outer_dex_file = GetGraph()->GetDexFile(); @@ -5816,6 +6002,32 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr return desired_dispatch_info; } +Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); + return temp; + } + return location.AsRegister<Register>(); +} + void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // For better instruction scheduling we load the direct code pointer before the method pointer. switch (invoke->GetCodePtrLocation()) { @@ -5847,11 +6059,15 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ LoadLiteral(temp.AsRegister<Register>(), DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement this type. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HArmDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; @@ -5909,12 +6125,16 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp Register temp = temp_location.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - DCHECK(receiver.IsRegister()); // /* HeapReference<Class> */ temp = receiver->klass_ - __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); + __ LoadFromOffset(kLoadWord, temp, receiver, class_offset); MaybeRecordImplicitNullCheck(invoke); // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. @@ -5936,7 +6156,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); - size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size(); + size_t size = + method_patches_.size() + + call_patches_.size() + + relative_call_patches_.size() + + /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -5962,6 +6186,28 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche info.target_method.dex_file, info.target_method.dex_method_index)); } + for (const auto& pair : dex_cache_arrays_base_labels_) { + HArmDexCacheArraysBase* base = pair.first; + const DexCacheArraysBaseLabels* labels = &pair.second; + const DexFile& dex_file = base->GetDexFile(); + size_t base_element_offset = base->GetElementOffset(); + DCHECK(labels->add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position()); + // Add MOVW patch. + DCHECK(labels->movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position()); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset, + &dex_file, + add_pc_offset, + base_element_offset)); + // Add MOVT patch. + DCHECK(labels->movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position()); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset, + &dex_file, + add_pc_offset, + base_element_offset)); + } } Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method, @@ -6012,7 +6258,7 @@ void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold && + if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold && codegen_->GetAssembler()->IsThumb()) { locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base. if (switch_instr->GetStartValue() != 0) { @@ -6028,12 +6274,30 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) { + if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) { // Create a series of compare/jumps. + Register temp_reg = IP; + // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store + // the immediate, because IP is used as the destination register. For the other + // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant, + // and they can be encoded in the instruction without making use of IP register. + __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - GenerateCompareWithImmediate(value_reg, lower_bound + i); - __ b(codegen_->GetLabelOf(successors[i]), EQ); + // Jump to successors[0] if value == lower_bound. + __ b(codegen_->GetLabelOf(successors[0]), EQ); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ AddConstantSetFlags(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + GenerateCompareWithImmediate(temp_reg, 1); + __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ); } // And the default for any other value. @@ -6073,6 +6337,23 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } } +void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); + codegen_->AddDexCacheArraysBase(base); +} + +void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + Register base_reg = base->GetLocations()->Out().AsRegister<Register>(); + CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base); + __ BindTrackedLabel(&labels->movw_label); + __ movw(base_reg, 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(base_reg, 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(base_reg, base_reg, ShifterOperand(PC)); +} + void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { DCHECK(type == Primitive::kPrimVoid); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 89de4f801d..b7c58e1248 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -170,6 +170,10 @@ class LocationsBuilderARM : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode); + void HandleCondition(HCondition* condition); + void HandleIntegerRotate(LocationSummary* locations); + void HandleLongRotate(LocationSummary* locations); + void HandleRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -213,6 +217,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateOrrConst(Register out, Register first, uint32_t value); void GenerateEorConst(Register out, Register first, uint32_t value); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleCondition(HCondition* condition); + void HandleIntegerRotate(LocationSummary* locations); + void HandleLongRotate(LocationSummary* locations); + void HandleRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateWideAtomicStore(Register addr, uint32_t offset, @@ -373,6 +381,31 @@ class CodeGeneratorARM : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // The PC-relative base address is loaded with three instructions, MOVW+MOVT + // to load the offset to base_reg and then ADD base_reg, PC. The offset is + // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we + // currently emit these 3 instructions together, instruction scheduling could + // split this sequence apart, so we keep separate labels for each of them. + struct DexCacheArraysBaseLabels { + DexCacheArraysBaseLabels() = default; + DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default; + + Label movw_label; + Label movt_label; + Label add_pc_label; + }; + + void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) { + DexCacheArraysBaseLabels labels; + dex_cache_arrays_base_labels_.Put(base, std::move(labels)); + } + + DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) { + auto it = dex_cache_arrays_base_labels_.find(base); + DCHECK(it != dex_cache_arrays_base_labels_.end()); + return &it->second; + } + // Generate a read barrier for a heap reference within `instruction`. // // A read barrier for an object reference read from the heap is @@ -419,7 +452,12 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*, + DexCacheArraysBaseLabels, + std::less<HArmDexCacheArraysBase*>>; Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); @@ -441,6 +479,8 @@ class CodeGeneratorARM : public CodeGenerator { // Using ArenaDeque<> which retains element addresses on push/emplace_back(). ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 04955ddd23..b49f42b6c8 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -42,6 +42,9 @@ using namespace vixl; // NOLINT(build/namespaces) namespace art { +template<class MirrorType> +class GcRoot; + namespace arm64 { using helpers::CPURegisterFrom; @@ -68,10 +71,10 @@ using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; -// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump +// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { @@ -431,15 +434,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = InputRegisterAt(instruction_, 0); - Register temp = WRegisterFrom(locations->GetTemp(0)); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ Ldr(temp, HeapOperand(obj, class_offset)); - arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -454,11 +448,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, - const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -494,6 +488,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } @@ -551,7 +546,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { uint32_t num_entries = switch_instr_->GetNumEntries(); - DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold); + DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); // We are about to use the assembler to place literals directly. Make sure we have enough // underlying code buffer and we have generated the jump table with right size. @@ -571,6 +566,271 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ Ldr(out, HeapOperand(out, class_offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + + // Note: In the case of a HArrayGet instruction, when the base + // address is a HArm64IntermediateAddress instruction, it does not + // point to the array object itself, but to an offset within this + // object. However, the read barrier entry point needs the array + // object address to be passed as first argument. So we + // temporarily set back `obj_` to that address, and restore its + // initial value later. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Sub(obj_reg, obj_reg, offset_); + } + + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); + if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { + // We are about to change the value of `index_reg` (see the + // calls to vixl::MacroAssembler::Lsl and + // vixl::MacroAssembler::Mov below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg.W(), index_reg); + index_reg = free_reg; + index = LocationFrom(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Add(index_reg, index_reg, Operand(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + LocationFrom(calling_convention.GetRegisterAt(0)), + type, + nullptr); + parallel_move.AddMove(obj_, + LocationFrom(calling_convention.GetRegisterAt(1)), + type, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + LocationFrom(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); + } + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + + // Restore the value of `obj_` when it corresponds to a + // HArm64IntermediateAddress instruction. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Add(obj_reg, obj_reg, offset_); + } + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(XRegisterFrom(ref_).code()); + size_t obj = static_cast<int>(XRegisterFrom(obj_).code()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return Register(VIXLRegCodeFromART(i), kXRegSize); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // The argument of the ReadBarrierForRootSlow is not a managed + // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; + // thus we need a 64-bit move here, and we cannot use + // + // arm64_codegen->MoveLocation( + // LocationFrom(calling_convention.GetRegisterAt(0)), + // root_, + // type); + // + // which would emit a 32-bit move, as `type` is a (32-bit wide) + // reference type (`Primitive::kPrimNot`). + __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -1401,13 +1661,25 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { } void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -1436,7 +1708,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W()); + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); } } @@ -1515,6 +1791,17 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { __ Orr(dst, lhs, rhs); } else if (instr->IsSub()) { __ Sub(dst, lhs, rhs); + } else if (instr->IsRor()) { + if (rhs.IsImmediate()) { + uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1); + __ Ror(dst, lhs, shift); + } else { + // Ensure shift distance is in the same size register as the result. If + // we are rotating a long and the shift comes in a w register originally, + // we don't need to sxtw for use as an x since the shift distances are + // all & reg_bits - 1. + __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); + } } else { DCHECK(instr->IsXor()); __ Eor(dst, lhs, rhs); @@ -1613,6 +1900,82 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + DCHECK(instruction->GetType() == Primitive::kPrimInt || + instruction->GetType() == Primitive::kPrimLong); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + if (instruction->GetInstrKind() == HInstruction::kNeg) { + locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + Primitive::Type type = instruction->GetType(); + HInstruction::InstructionKind kind = instruction->GetInstrKind(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + Register out = OutputRegister(instruction); + Register left; + if (kind != HInstruction::kNeg) { + left = InputRegisterAt(instruction, 0); + } + // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the + // shifter operand operation, the IR generating `right_reg` (input to the type + // conversion) can have a different type from the current instruction's type, + // so we manually indicate the type. + Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); + int64_t shift_amount = (type == Primitive::kPrimInt) + ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue) + : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue); + + Operand right_operand(0); + + HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) { + right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); + } else { + right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount); + } + + // Logical binary operations do not support extension operations in the + // operand. Note that VIXL would still manage if it was passed by generating + // the extension as a separate instruction. + // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. + DCHECK(!right_operand.IsExtendedRegister() || + (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && + kind != HInstruction::kNeg)); + switch (kind) { + case HInstruction::kAdd: + __ Add(out, left, right_operand); + break; + case HInstruction::kAnd: + __ And(out, left, right_operand); + break; + case HInstruction::kNeg: + DCHECK(instruction->InputAt(0)->AsConstant()->IsZero()); + __ Neg(out, right_operand); + break; + case HInstruction::kOr: + __ Orr(out, left, right_operand); + break; + case HInstruction::kSub: + __ Sub(out, left, right_operand); + break; + case HInstruction::kXor: + __ Eor(out, left, right_operand); + break; + default: + LOG(FATAL) << "Unexpected operation kind: " << kind; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -1628,23 +1991,75 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + Register res = OutputRegister(instr); + Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); + Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); + + // Avoid emitting code that could trigger Cortex A53's erratum 835769. + // This fixup should be carried out for all multiply-accumulate instructions: + // madd, msub, smaddl, smsubl, umaddl and umsubl. + if (instr->GetType() == Primitive::kPrimLong && + codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { + MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); + vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize; + if (prev->IsLoadOrStore()) { + // Make sure we emit only exactly one nop. + vixl::CodeBufferCheckScope scope(masm, + vixl::kInstructionSize, + vixl::CodeBufferCheckScope::kCheck, + vixl::CodeBufferCheckScope::kExactSize); + __ nop(); + } + } + + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Madd(res, mul_left, mul_right, accumulator); + } else { + DCHECK(instr->GetOpKind() == HInstruction::kSub); + __ Msub(res, mul_left, mul_right, accumulator); + } +} + void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Primitive::Type type = instruction->GetType(); Register obj = InputRegisterAt(instruction, 0); - Location index = instruction->GetLocations()->InAt(1); - size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); + LocationSummary* locations = instruction->GetLocations(); + Location index = locations->InAt(1); + uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); MemOperand source = HeapOperand(obj); CPURegister dest = OutputCPURegister(instruction); @@ -1676,8 +2091,22 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { codegen_->Load(type, dest, source); codegen_->MaybeRecordImplicitNullCheck(instruction); - if (instruction->GetType() == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(dest.W()); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + Location out = locations->Out(); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + // Note: when `obj_loc` is a HArm64IntermediateAddress, it does + // not contain the base address of the array object, which is + // needed by the read barrier entry point. So the read barrier + // slow path will temporarily set back `obj_loc` to the right + // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode). + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + } } } @@ -1695,12 +2124,19 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + if (Primitive::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -1710,7 +2146,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -1724,7 +2160,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { BlockPoolsScope block_pools(masm); if (!needs_write_barrier) { - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); destination = HeapOperand(array, offset); @@ -1774,7 +2210,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -1789,26 +2225,66 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - Register temp2 = temps.AcquireSameSizeAs(array); - __ Ldr(temp, HeapOperand(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, component_offset)); - __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ Cmp(temp, temp2); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - vixl::Label do_put; - __ B(eq, &do_put); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, super_offset)); - // No need to unpoison, we're comparing against null. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ Ldr(temp, HeapOperand(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); + // + // __ Cmp(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ B(slow_path->GetEntryLabel()); } else { - __ B(ne, slow_path->GetEntryLabel()); + Register temp2 = temps.AcquireSameSizeAs(array); + // /* HeapReference<Class> */ temp = array->klass_ + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ Ldr(temp, HeapOperand(temp, component_offset)); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp, temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ Ldr(temp, HeapOperand(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); + } + temps.Release(temp2); } - temps.Release(temp2); } if (kPoisonHeapReferences) { @@ -1824,7 +2300,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } __ Str(source, destination); - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -1951,7 +2427,7 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { } } -void LocationsBuilderARM64::VisitCondition(HCondition* instruction) { +void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { @@ -1971,7 +2447,7 @@ void LocationsBuilderARM64::VisitCondition(HCondition* instruction) { } } -void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) { +void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { if (!instruction->NeedsMaterialization()) { return; } @@ -2019,8 +2495,8 @@ void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) { M(Above) \ M(AboveOrEqual) #define DEFINE_CONDITION_VISITORS(Name) \ -void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); } \ -void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); } +void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \ +void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) #undef DEFINE_CONDITION_VISITORS #undef FOR_EACH_CONDITION_INSTRUCTION @@ -2473,6 +2949,14 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction); } @@ -2491,40 +2975,44 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM64 uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); + Location out_loc = locations->Out(); Register out = OutputRegister(instruction); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); @@ -2540,15 +3028,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cbz(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ Ldr(target, HeapOperand(obj.W(), class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ Ldr(out, HeapOperand(obj.W(), class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -2559,13 +3041,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -2576,14 +3068,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -2594,14 +3096,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. vixl::Label exact_check; __ Cmp(out, cls); __ B(eq, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ Ldr(out, HeapOperand(out, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -2612,11 +3124,12 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ B(&done); break; } + case TypeCheckKind::kArrayCheck: { __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -2625,13 +3138,25 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved and interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ B(&done); } @@ -2657,58 +3182,62 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); - Register temp; - if (!locations->WillCall()) { - temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); - } - + Location temp_loc = locations->GetTemp(0); + Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCodeARM64* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCodeARM64* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); vixl::Label done; // Avoid null check if we know obj is not null. @@ -2716,76 +3245,159 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(obj, &done); } - if (locations->WillCall()) { - __ Ldr(obj, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(obj); - } else { - __ Ldr(temp, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ Cmp(temp, cls); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ B(ne, slow_path->GetEntryLabel()); + __ B(ne, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - vixl::Label loop; + vixl::Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ Cbnz(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ Cmp(temp, cls); __ B(ne, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop; __ Bind(&loop); __ Cmp(temp, cls); __ B(eq, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ Cbnz(temp, &loop); - // Jump to the slow path to throw the exception. - __ B(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + vixl::Label check_non_primitive_component_type; __ Cmp(temp, cls); __ B(eq, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ Ldr(temp, HeapOperand(temp, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ Cbnz(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ Ldrh(temp, HeapOperand(temp, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Cbz(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved + // and interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ B(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { @@ -2828,10 +3440,11 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + LocationSummary* locations = invoke->GetLocations(); + Register temp = XRegisterFrom(locations->GetTemp(0)); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); - Location receiver = invoke->GetLocations()->InAt(0); + Location receiver = locations->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); @@ -2843,14 +3456,22 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok scratch_scope.Exclude(ip1); __ Mov(ip1, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ Ldr(temp.W(), StackOperandFrom(receiver)); + // /* HeapReference<Class> */ temp = temp->klass_ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2972,7 +3593,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); } - // temp = current_method->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ Ldr(reg.X(), MemOperand(method_reg.X(), ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value())); @@ -3016,8 +3637,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok } void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); Register temp = XRegisterFrom(temp_in); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); @@ -3027,8 +3652,15 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te BlockPoolsScope block_pools(GetVIXLAssembler()); DCHECK(receiver.IsRegister()); - __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); + // /* HeapReference<Class> */ temp = receiver->klass_ + __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -3141,7 +3773,8 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, LocationFrom(calling_convention.GetRegisterAt(0)), - LocationFrom(vixl::x0)); + LocationFrom(vixl::x0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { @@ -3151,30 +3784,56 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } + Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); Register current_method = InputRegisterAt(cls, 0); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } } else { - DCHECK(cls->CanCallRuntime()); MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3210,24 +3869,50 @@ void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UN } void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); - codegen_->AddSlowPath(slow_path); - + Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. - __ Cbz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + + if (!load->IsInDexCache()) { + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + codegen_->AddSlowPath(slow_path); + __ Cbz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderARM64::VisitLocal(HLocal* local) { @@ -3260,7 +3945,11 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -3349,8 +4038,6 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); - CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, - void*, uint32_t, int32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -3375,7 +4062,6 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { @@ -3555,11 +4241,17 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } default: LOG(FATAL) << "Unexpected rem type " << type; + UNREACHABLE(); } } @@ -3589,6 +4281,14 @@ void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATT codegen_->GenerateFrameExit(); } +void LocationsBuilderARM64::VisitRor(HRor* ror) { + HandleBinaryOp(ror); +} + +void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) { + HandleBinaryOp(ror); +} + void LocationsBuilderARM64::VisitShl(HShl* shl) { HandleShift(shl); } @@ -3626,6 +4326,7 @@ void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) { default: LOG(FATAL) << "Unimplemented local type " << field_type; + UNREACHABLE(); } } @@ -3799,9 +4500,7 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers int min_size = std::min(result_size, input_size); Register output = OutputRegister(conversion); Register source = InputRegisterAt(conversion, 0); - if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) { - __ Ubfx(output, source, 0, result_size * kBitsPerByte); - } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { // 'int' values are used directly as W registers, discarding the top // bits, so we don't need to sign-extend and can just perform a move. // We do not pass the `kDiscardForSameWReg` argument to force clearing the @@ -3810,9 +4509,11 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers // 32bit input value as a 64bit value assuming that the top 32 bits are // zero. __ Mov(output.W(), source.W()); - } else if ((result_type == Primitive::kPrimChar) || - ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { - __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + __ Ubfx(output, + output.IsX() ? source.X() : source.W(), + 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); } else { __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } @@ -3889,20 +4590,29 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst // ranges and emit the tables only as required. static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; - if (num_entries < kPackedSwitchJumpTableThreshold || + if (num_entries <= kPackedSwitchCompareJumpThreshold || // Current instruction id is an upper bound of the number of HIRs in the graph. GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { // Create a series of compare/jumps. + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireW(); + __ Subs(temp, value_reg, Operand(lower_bound)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - vixl::Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Cbz(value_reg, succ); - } else { - __ Cmp(value_reg, Operand(case_value)); - __ B(eq, succ); - } + // Jump to successors[0] if value == lower_bound. + __ B(eq, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Subs(temp, temp, Operand(2)); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Cmp(temp, Operand(1)); + __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. @@ -3947,6 +4657,82 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst } } +void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); + } +} + +void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 881afcc123..0e90ac6345 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -215,6 +215,7 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); @@ -257,6 +258,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { void HandleFieldSet(HInstruction* instruction); void HandleFieldGet(HInstruction* instruction); void HandleInvoke(HInvoke* instr); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* instr); CodeGeneratorARM64* const codegen_; @@ -424,6 +426,51 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index f3178bd77c..07efdee22d 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -415,13 +415,11 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickInstanceofNonTrivial)); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -461,6 +459,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickDeoptimize)); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } @@ -1957,11 +1956,8 @@ void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) { void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { Primitive::Type in_type = compare->InputAt(0)->GetType(); - LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type) - ? LocationSummary::kCall - : LocationSummary::kNoCall; - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); switch (in_type) { case Primitive::kPrimLong: @@ -1972,13 +1968,11 @@ void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { break; case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - } default: LOG(FATAL) << "Unexpected type for compare operation " << in_type; @@ -1987,7 +1981,10 @@ void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { LocationSummary* locations = instruction->GetLocations(); + Register res = locations->Out().AsRegister<Register>(); Primitive::Type in_type = instruction->InputAt(0)->GetType(); + bool gt_bias = instruction->IsGtBias(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); // 0 if: left == right // 1 if: left > right @@ -1995,7 +1992,6 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { switch (in_type) { case Primitive::kPrimLong: { MipsLabel done; - Register res = locations->Out().AsRegister<Register>(); Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>(); @@ -2012,45 +2008,82 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - int32_t entry_point_offset; - bool direct; - if (in_type == Primitive::kPrimFloat) { - if (instruction->IsGtBias()) { - entry_point_offset = QUICK_ENTRY_POINT(pCmpgFloat); - direct = IsDirectEntrypoint(kQuickCmpgFloat); + case Primitive::kPrimFloat: { + FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); + MipsLabel done; + if (isR6) { + __ CmpEqS(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - entry_point_offset = QUICK_ENTRY_POINT(pCmplFloat); - direct = IsDirectEntrypoint(kQuickCmplFloat); + __ CmpLtS(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } } else { - if (instruction->IsGtBias()) { - entry_point_offset = QUICK_ENTRY_POINT(pCmpgDouble); - direct = IsDirectEntrypoint(kQuickCmpgDouble); + if (gt_bias) { + __ ColtS(0, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1t(0, &done); + __ CeqS(0, lhs, rhs); + __ LoadConst32(res, 1); + __ Movt(res, ZERO, 0); } else { - entry_point_offset = QUICK_ENTRY_POINT(pCmplDouble); - direct = IsDirectEntrypoint(kQuickCmplDouble); + __ ColtS(0, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1t(0, &done); + __ CeqS(0, lhs, rhs); + __ LoadConst32(res, -1); + __ Movt(res, ZERO, 0); } } - codegen_->InvokeRuntime(entry_point_offset, - instruction, - instruction->GetDexPc(), - nullptr, - direct); - if (in_type == Primitive::kPrimFloat) { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); + __ Bind(&done); + break; + } + case Primitive::kPrimDouble: { + FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); + MipsLabel done; + if (isR6) { + __ CmpEqD(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); + __ CmpLtD(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } } else { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); + if (gt_bias) { + __ ColtD(0, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1t(0, &done); + __ CeqD(0, lhs, rhs); + __ LoadConst32(res, 1); + __ Movt(res, ZERO, 0); } else { - CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); + __ ColtD(0, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1t(0, &done); + __ CeqD(0, lhs, rhs); + __ LoadConst32(res, -1); + __ Movt(res, ZERO, 0); } } + __ Bind(&done); break; } @@ -2059,163 +2092,233 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { } } -void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) { +void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + switch (instruction->InputAt(0)->GetType()) { + default: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + break; + } if (instruction->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } -void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) { +void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { if (!instruction->NeedsMaterialization()) { return; } - // TODO: generalize to long - DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong); + Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); Register dst = locations->Out().AsRegister<Register>(); + MipsLabel true_label; - Register lhs = locations->InAt(0).AsRegister<Register>(); - Location rhs_location = locations->InAt(1); + switch (type) { + default: + // Integer case. + GenerateIntCompare(instruction->GetCondition(), locations); + return; - Register rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + case Primitive::kPrimLong: + // TODO: don't use branches. + GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // TODO: don't use branches. + GenerateFpCompareAndBranch(instruction->GetCondition(), + instruction->IsGtBias(), + type, + locations, + &true_label); + break; + } + + // Convert the branches into the result. + MipsLabel done; + + // False case: result = 0. + __ LoadConst32(dst, 0); + __ B(&done); + + // True case: result = 1. + __ Bind(&true_label); + __ LoadConst32(dst, 1); + __ Bind(&done); +} + +void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Move(out, ZERO); } else { - rhs_reg = rhs_location.AsRegister<Register>(); + if (imm == -1) { + __ Subu(out, ZERO, dividend); + } else if (out != dividend) { + __ Move(out, dividend); + } } +} - IfCondition if_cond = instruction->GetCondition(); +void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); - switch (if_cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - if (if_cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - break; + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); + DCHECK(IsPowerOfTwo(abs_imm)); + int ctz_imm = CTZ(abs_imm); + + if (instruction->IsDiv()) { + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Srl(TMP, dividend, 31); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + } + __ Addu(out, dividend, TMP); + __ Sra(out, out, ctz_imm); + if (imm < 0) { + __ Subu(out, ZERO, out); + } + } else { + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Sra(TMP, dividend, 31); + __ Subu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Addu(out, out, TMP); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + __ Addu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); + __ Sll(out, out, 32 - ctz_imm); + __ Srl(out, out, 32 - ctz_imm); } - if (if_cond == kCondGE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the slt instruction but no sge. - __ Xori(dst, dst, 1); - } - break; + __ Subu(out, out, TMP); + } + } +} - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm + 1); - if (if_cond == kCondGT) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the slti instruction but no sgti. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - if (if_cond == kCondLE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the slt instruction but no sle. - __ Xori(dst, dst, 1); - } - } - break; +void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); - case kCondB: - case kCondAE: - // Use sltiu instruction if rhs_imm is in range [0, 32767] or in - // [max_unsigned - 32767 = 0xffff8000, max_unsigned = 0xffffffff]. - if (use_imm && - (IsUint<15>(rhs_imm) || - IsUint<15>(rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) { - if (IsUint<15>(rhs_imm)) { - __ Sltiu(dst, lhs, rhs_imm); - } else { - // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended - // and then used as unsigned integer (range [0xffff8000, 0xffffffff]). - __ Sltiu(dst, lhs, rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16))); - } + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + + __ LoadConst32(TMP, magic); + if (isR6) { + __ MuhR6(TMP, dividend, TMP); + } else { + __ MultR2(dividend, TMP); + __ Mfhi(TMP); + } + if (imm > 0 && magic < 0) { + __ Addu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Subu(TMP, TMP, dividend); + } + + if (shift != 0) { + __ Sra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Sra(out, TMP, 31); + __ Subu(out, TMP, out); + } else { + __ Sra(AT, TMP, 31); + __ Subu(AT, TMP, AT); + __ LoadConst32(TMP, imm); + if (isR6) { + __ MulR6(TMP, AT, TMP); + } else { + __ MulR2(TMP, AT, TMP); + } + __ Subu(out, dividend, TMP); + } +} + +void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Register out = locations->Out().AsRegister<Register>(); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(std::abs(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + Register dividend = locations->InAt(0).AsRegister<Register>(); + Register divisor = second.AsRegister<Register>(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + if (instruction->IsDiv()) { + if (isR6) { + __ DivR6(out, dividend, divisor); } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - if (if_cond == kCondAE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the sltu instruction but no sgeu. - __ Xori(dst, dst, 1); + __ DivR2(out, dividend, divisor); } - break; - - case kCondBE: - case kCondA: - // Use sltiu instruction if rhs_imm is in range [0, 32766] or in - // [max_unsigned - 32767 - 1 = 0xffff7fff, max_unsigned - 1 = 0xfffffffe]. - // lhs <= rhs is simulated via lhs < rhs + 1. - if (use_imm && (rhs_imm != -1) && - (IsUint<15>(rhs_imm + 1) || - IsUint<15>(rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) { - if (IsUint<15>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Sltiu(dst, lhs, rhs_imm + 1); - } else { - // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended - // and then used as unsigned integer (range [0xffff8000, 0xffffffff] where rhs_imm - // is in range [0xffff7fff, 0xfffffffe] since lhs <= rhs is simulated via lhs < rhs + 1). - __ Sltiu(dst, lhs, rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16))); - } - if (if_cond == kCondA) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the sltiu instruction but no sgtiu. - __ Xori(dst, dst, 1); - } + } else { + if (isR6) { + __ ModR6(out, dividend, divisor); } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - if (if_cond == kCondBE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the sltu instruction but no sleu. - __ Xori(dst, dst, 1); - } + __ ModR2(out, dividend, divisor); } - break; + } } } @@ -2230,7 +2333,7 @@ void LocationsBuilderMIPS::VisitDiv(HDiv* div) { switch (type) { case Primitive::kPrimInt: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -2259,20 +2362,11 @@ void LocationsBuilderMIPS::VisitDiv(HDiv* div) { void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) { Primitive::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (type) { - case Primitive::kPrimInt: { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - Register rhs = locations->InAt(1).AsRegister<Register>(); - if (isR6) { - __ DivR6(dst, lhs, rhs); - } else { - __ DivR2(dst, lhs, rhs); - } + case Primitive::kPrimInt: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), instruction, @@ -2419,6 +2513,627 @@ void InstructionCodeGeneratorMIPS::VisitTryBoundary(HTryBoundary* try_boundary) } } +void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond, + LocationSummary* locations) { + Register dst = locations->Out().AsRegister<Register>(); + Register lhs = locations->InAt(0).AsRegister<Register>(); + Location rhs_location = locations->InAt(1); + Register rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } else { + rhs_reg = rhs_location.AsRegister<Register>(); + } + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } + break; + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + if (cond == kCondGE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the slt instruction but no sge. + __ Xori(dst, dst, 1); + } + break; + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm + 1)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm + 1); + if (cond == kCondGT) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the slti instruction but no sgti. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + if (cond == kCondLE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the slt instruction but no sle. + __ Xori(dst, dst, 1); + } + } + break; + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0xffff8000, 0xffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + if (cond == kCondAE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the sltu instruction but no sgeu. + __ Xori(dst, dst, 1); + } + break; + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0xffff8000, 0xffffffff]. + __ Sltiu(dst, lhs, rhs_imm + 1); + if (cond == kCondA) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the sltiu instruction but no sgtiu. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + if (cond == kCondBE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the sltu instruction but no sleu. + __ Xori(dst, dst, 1); + } + } + break; + } +} + +void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond, + LocationSummary* locations, + MipsLabel* label) { + Register lhs = locations->InAt(0).AsRegister<Register>(); + Location rhs_location = locations->InAt(1); + Register rhs_reg = ZERO; + int32_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } else { + rhs_reg = rhs_location.AsRegister<Register>(); + } + + if (use_imm && rhs_imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Beqz(lhs, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Bnez(lhs, label); + break; + case kCondLT: + __ Bltz(lhs, label); + break; + case kCondGE: + __ Bgez(lhs, label); + break; + case kCondLE: + __ Blez(lhs, label); + break; + case kCondGT: + __ Bgtz(lhs, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ B(label); + break; + } + } else { + if (use_imm) { + // TODO: more efficient comparison with 16-bit constants without loading them into TMP. + rhs_reg = TMP; + __ LoadConst32(rhs_reg, rhs_imm); + } + switch (cond) { + case kCondEQ: + __ Beq(lhs, rhs_reg, label); + break; + case kCondNE: + __ Bne(lhs, rhs_reg, label); + break; + case kCondLT: + __ Blt(lhs, rhs_reg, label); + break; + case kCondGE: + __ Bge(lhs, rhs_reg, label); + break; + case kCondLE: + __ Bge(rhs_reg, lhs, label); + break; + case kCondGT: + __ Blt(rhs_reg, lhs, label); + break; + case kCondB: + __ Bltu(lhs, rhs_reg, label); + break; + case kCondAE: + __ Bgeu(lhs, rhs_reg, label); + break; + case kCondBE: + __ Bgeu(rhs_reg, lhs, label); + break; + case kCondA: + __ Bltu(rhs_reg, lhs, label); + break; + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond, + LocationSummary* locations, + MipsLabel* label) { + Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Location rhs_location = locations->InAt(1); + Register rhs_high = ZERO; + Register rhs_low = ZERO; + int64_t imm = 0; + uint32_t imm_high = 0; + uint32_t imm_low = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + imm = rhs_location.GetConstant()->AsLongConstant()->GetValue(); + imm_high = High32Bits(imm); + imm_low = Low32Bits(imm); + } else { + rhs_high = rhs_location.AsRegisterPairHigh<Register>(); + rhs_low = rhs_location.AsRegisterPairLow<Register>(); + } + + if (use_imm && imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Or(TMP, lhs_high, lhs_low); + __ Beqz(TMP, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Or(TMP, lhs_high, lhs_low); + __ Bnez(TMP, label); + break; + case kCondLT: + __ Bltz(lhs_high, label); + break; + case kCondGE: + __ Bgez(lhs_high, label); + break; + case kCondLE: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Bgeu(AT, TMP, label); + break; + case kCondGT: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Bltu(AT, TMP, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ B(label); + break; + } + } else if (use_imm) { + // TODO: more efficient comparison with constants without loading them into TMP/AT. + switch (cond) { + case kCondEQ: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondNE: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(TMP, TMP, AT); + __ Bnez(TMP, label); + break; + case kCondLT: + __ LoadConst32(TMP, imm_high); + __ Blt(lhs_high, TMP, label); + __ Slt(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, lhs_low, AT); + __ Blt(TMP, AT, label); + break; + case kCondGE: + __ LoadConst32(TMP, imm_high); + __ Blt(TMP, lhs_high, label); + __ Slt(TMP, lhs_high, TMP); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, lhs_low, AT); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondLE: + __ LoadConst32(TMP, imm_high); + __ Blt(lhs_high, TMP, label); + __ Slt(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, AT, lhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondGT: + __ LoadConst32(TMP, imm_high); + __ Blt(TMP, lhs_high, label); + __ Slt(TMP, lhs_high, TMP); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, AT, lhs_low); + __ Blt(TMP, AT, label); + break; + case kCondB: + __ LoadConst32(TMP, imm_high); + __ Bltu(lhs_high, TMP, label); + __ Sltu(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, lhs_low, AT); + __ Blt(TMP, AT, label); + break; + case kCondAE: + __ LoadConst32(TMP, imm_high); + __ Bltu(TMP, lhs_high, label); + __ Sltu(TMP, lhs_high, TMP); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, lhs_low, AT); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondBE: + __ LoadConst32(TMP, imm_high); + __ Bltu(lhs_high, TMP, label); + __ Sltu(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, AT, lhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondA: + __ LoadConst32(TMP, imm_high); + __ Bltu(TMP, lhs_high, label); + __ Sltu(TMP, lhs_high, TMP); + __ LoadConst32(AT, imm_low); + __ Sltu(AT, AT, lhs_low); + __ Blt(TMP, AT, label); + break; + } + } else { + switch (cond) { + case kCondEQ: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondNE: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(TMP, TMP, AT); + __ Bnez(TMP, label); + break; + case kCondLT: + __ Blt(lhs_high, rhs_high, label); + __ Slt(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Blt(TMP, AT, label); + break; + case kCondGE: + __ Blt(rhs_high, lhs_high, label); + __ Slt(TMP, lhs_high, rhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondLE: + __ Blt(lhs_high, rhs_high, label); + __ Slt(TMP, rhs_high, lhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondGT: + __ Blt(rhs_high, lhs_high, label); + __ Slt(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Blt(TMP, AT, label); + break; + case kCondB: + __ Bltu(lhs_high, rhs_high, label); + __ Sltu(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Blt(TMP, AT, label); + break; + case kCondAE: + __ Bltu(rhs_high, lhs_high, label); + __ Sltu(TMP, lhs_high, rhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondBE: + __ Bltu(lhs_high, rhs_high, label); + __ Sltu(TMP, rhs_high, lhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Or(TMP, TMP, AT); + __ Beqz(TMP, label); + break; + case kCondA: + __ Bltu(rhs_high, lhs_high, label); + __ Sltu(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Blt(TMP, AT, label); + break; + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + MipsLabel* label) { + FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + if (type == Primitive::kPrimFloat) { + if (isR6) { + switch (cond) { + case kCondEQ: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + } else { + __ CmpUltS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeS(FTMP, lhs, rhs); + } else { + __ CmpUleS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltS(FTMP, rhs, lhs); + } else { + __ CmpLtS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleS(FTMP, rhs, lhs); + } else { + __ CmpLeS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } else { + switch (cond) { + case kCondEQ: + __ CeqS(0, lhs, rhs); + __ Bc1t(0, label); + break; + case kCondNE: + __ CeqS(0, lhs, rhs); + __ Bc1f(0, label); + break; + case kCondLT: + if (gt_bias) { + __ ColtS(0, lhs, rhs); + } else { + __ CultS(0, lhs, rhs); + } + __ Bc1t(0, label); + break; + case kCondLE: + if (gt_bias) { + __ ColeS(0, lhs, rhs); + } else { + __ CuleS(0, lhs, rhs); + } + __ Bc1t(0, label); + break; + case kCondGT: + if (gt_bias) { + __ CultS(0, rhs, lhs); + } else { + __ ColtS(0, rhs, lhs); + } + __ Bc1t(0, label); + break; + case kCondGE: + if (gt_bias) { + __ CuleS(0, rhs, lhs); + } else { + __ ColeS(0, rhs, lhs); + } + __ Bc1t(0, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + if (isR6) { + switch (cond) { + case kCondEQ: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + } else { + __ CmpUltD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeD(FTMP, lhs, rhs); + } else { + __ CmpUleD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltD(FTMP, rhs, lhs); + } else { + __ CmpLtD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleD(FTMP, rhs, lhs); + } else { + __ CmpLeD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } else { + switch (cond) { + case kCondEQ: + __ CeqD(0, lhs, rhs); + __ Bc1t(0, label); + break; + case kCondNE: + __ CeqD(0, lhs, rhs); + __ Bc1f(0, label); + break; + case kCondLT: + if (gt_bias) { + __ ColtD(0, lhs, rhs); + } else { + __ CultD(0, lhs, rhs); + } + __ Bc1t(0, label); + break; + case kCondLE: + if (gt_bias) { + __ ColeD(0, lhs, rhs); + } else { + __ CuleD(0, lhs, rhs); + } + __ Bc1t(0, label); + break; + case kCondGT: + if (gt_bias) { + __ CultD(0, rhs, lhs); + } else { + __ ColtD(0, rhs, lhs); + } + __ Bc1t(0, label); + break; + case kCondGE: + if (gt_bias) { + __ CuleD(0, rhs, lhs); + } else { + __ ColeD(0, rhs, lhs); + } + __ Bc1t(0, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } + } +} + void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, MipsLabel* true_target, @@ -2455,7 +3170,7 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi // The condition instruction has been materialized, compare the output to 0. Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - if (true_target == nullptr) { + if (true_target == nullptr) { __ Beqz(cond_val.AsRegister<Register>(), false_target); } else { __ Bnez(cond_val.AsRegister<Register>(), true_target); @@ -2464,98 +3179,27 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); + Primitive::Type type = condition->InputAt(0)->GetType(); + LocationSummary* locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + MipsLabel* branch_target = true_target; - Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>(); - Location rhs_location = condition->GetLocations()->InAt(1); - Register rhs_reg = ZERO; - int32_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<Register>(); - } - - IfCondition if_cond; - MipsLabel* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); - non_fallthrough_target = false_target; - } else { - if_cond = condition->GetCondition(); - non_fallthrough_target = true_target; + branch_target = false_target; } - if (use_imm && rhs_imm == 0) { - switch (if_cond) { - case kCondEQ: - __ Beqz(lhs, non_fallthrough_target); - break; - case kCondNE: - __ Bnez(lhs, non_fallthrough_target); - break; - case kCondLT: - __ Bltz(lhs, non_fallthrough_target); - break; - case kCondGE: - __ Bgez(lhs, non_fallthrough_target); - break; - case kCondLE: - __ Blez(lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bgtz(lhs, non_fallthrough_target); - break; - case kCondB: - break; // always false - case kCondBE: - __ Beqz(lhs, non_fallthrough_target); // <= 0 if zero - break; - case kCondA: - __ Bnez(lhs, non_fallthrough_target); // > 0 if non-zero - break; - case kCondAE: - __ B(non_fallthrough_target); // always true - break; - } - } else { - if (use_imm) { - // TODO: more efficient comparison with 16-bit constants without loading them into TMP. - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - switch (if_cond) { - case kCondEQ: - __ Beq(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bne(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Blt(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bge(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bge(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Blt(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltu(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeu(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeu(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltu(rhs_reg, lhs, non_fallthrough_target); - break; - } + switch (type) { + default: + GenerateIntCompareAndBranch(if_cond, locations, branch_target); + break; + case Primitive::kPrimLong: + GenerateLongCompareAndBranch(if_cond, locations, branch_target); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); + break; } } @@ -2600,6 +3244,14 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); @@ -2638,6 +3290,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2668,8 +3321,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (is_volatile && load_type == kLoadDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2692,21 +3344,34 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + if (obj == dst) { + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst, obj, offset); + } else { + __ LoadFromOffset(kLoadWord, dst, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); + __ LoadFromOffset(load_type, dst, obj, offset); } - __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadSFromOffset(dst, obj, offset); } else { - __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadDFromOffset(dst, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } if (is_volatile) { @@ -2752,6 +3417,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2782,8 +3448,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (is_volatile && store_type == kStoreDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check. __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2806,21 +3471,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->InAt(1).IsRegisterPair()); src = locations->InAt(1).AsRegisterPairLow<Register>(); + Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); + __ StoreToOffset(kStoreWord, src, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); } else { DCHECK(locations->InAt(1).IsRegister()); src = locations->InAt(1).AsRegister<Register>(); + __ StoreToOffset(store_type, src, obj, offset); } - __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->InAt(1).IsFpuRegister()); FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreSToOffset(src, obj, offset); } else { - __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreDToOffset(src, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } // TODO: memory barriers? @@ -3170,6 +3842,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { cls->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess)); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3181,21 +3854,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3232,24 +3910,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNU } void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = load->IsInDexCache() + ? LocationSummary::kNoCall + : LocationSummary::kCallOnSlowPath; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Register out = locations->Out().AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - __ Beqz(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + + if (!load->IsInDexCache()) { + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); + codegen_->AddSlowPath(slow_path); + __ Beqz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderMIPS::VisitLocal(HLocal* local) { @@ -3641,7 +4323,7 @@ void LocationsBuilderMIPS::VisitRem(HRem* rem) { switch (type) { case Primitive::kPrimInt: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -3671,21 +4353,11 @@ void LocationsBuilderMIPS::VisitRem(HRem* rem) { void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { Primitive::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (type) { - case Primitive::kPrimInt: { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - Register rhs = locations->InAt(1).AsRegister<Register>(); - if (isR6) { - __ ModR6(dst, lhs, rhs); - } else { - __ ModR2(dst, lhs, rhs); - } + case Primitive::kPrimInt: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), instruction, @@ -3700,7 +4372,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmodf)); - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { @@ -3708,7 +4380,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmod)); - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } default: @@ -3742,6 +4414,16 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN codegen_->GenerateFrameExit(); } +void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + void LocationsBuilderMIPS::VisitShl(HShl* shl) { HandleShift(shl); } @@ -4118,83 +4800,83 @@ void InstructionCodeGeneratorMIPS::VisitBoundType(HBoundType* instruction ATTRIB } void LocationsBuilderMIPS::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) { @@ -4223,19 +4905,31 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr HBasicBlock* default_block = switch_instr->GetDefaultBlock(); // Create a set of compare/jumps. + Register temp_reg = TMP; + __ Addiu32(temp_reg, value_reg, -lower_bound); + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltz(temp_reg, codegen_->GetLabelOf(default_block)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; ++i) { - int32_t case_value = lower_bound + i; - MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqz(value_reg, successor_label); - } else { - __ LoadConst32(TMP, case_value); - __ Beq(value_reg, TMP, successor_label); - } + // Jump to successors[0] if value == lower_bound. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); } - // Insert the default branch for every other value. + // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { __ B(codegen_->GetLabelOf(default_block)); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index e3a2cb40ef..38302ad315 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -185,6 +185,7 @@ class LocationsBuilderMIPS : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -220,15 +221,32 @@ class InstructionCodeGeneratorMIPS : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateIntCompare(IfCondition cond, LocationSummary* locations); + void GenerateIntCompareAndBranch(IfCondition cond, + LocationSummary* locations, + MipsLabel* label); + void GenerateLongCompareAndBranch(IfCondition cond, + LocationSummary* locations, + MipsLabel* label); + void GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + MipsLabel* label); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, MipsLabel* true_target, MipsLabel* false_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); MipsAssembler* const assembler_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 61008599ee..05834ff063 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -27,8 +27,8 @@ #include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" -#include "utils/mips64/assembler_mips64.h" #include "utils/assembler.h" +#include "utils/mips64/assembler_mips64.h" #include "utils/stack_checks.h" namespace art { @@ -210,7 +210,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } @@ -257,7 +257,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } @@ -312,13 +312,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { - __ B(GetReturnLabel()); + __ Bc(GetReturnLabel()); } else { - __ B(mips64_codegen->GetLabelOf(successor_)); + __ Bc(mips64_codegen->GetLabelOf(successor_)); } } - Label* GetReturnLabel() { + Mips64Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } @@ -331,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { HBasicBlock* const successor_; // If `successor_` is null, the label to branch to after the suspend check. - Label return_label_; + Mips64Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64); }; @@ -366,13 +366,11 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { instruction_, dex_pc, this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -380,7 +378,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } @@ -404,6 +402,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } @@ -441,6 +440,32 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& it : *disasm_info_->GetInstructionIntervals()) { + it.second.start = __ GetAdjustedPosition(it.second.start); + it.second.end = __ GetAdjustedPosition(it.second.end); + } + for (auto& it : *disasm_info_->GetSlowPathIntervals()) { + it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); + it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); + } + } + CodeGenerator::Finalize(allocator); } @@ -603,6 +628,7 @@ void CodeGeneratorMIPS64::GenerateFrameExit() { } __ Jr(RA); + __ Nop(); __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); @@ -939,7 +965,7 @@ Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const { } void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { - Label done; + Mips64Label done; GpuRegister card = AT; GpuRegister temp = TMP; __ Beqzc(value, &done); @@ -1048,6 +1074,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, // TODO: anything related to T9/GP/GOT/PIC/.so's? __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); __ Jalr(T9); + __ Nop(); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1079,7 +1106,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc __ Bind(slow_path->GetReturnLabel()); } else { __ Beqzc(TMP, codegen_->GetLabelOf(successor)); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); // slow_path will return to GetLabelOf(successor). } } @@ -1583,6 +1610,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } break; } @@ -1669,12 +1697,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // length is limited by the maximum positive signed 32-bit integer. // Unsigned comparison of length and index checks for index < 0 // and for length <= index simultaneously. - // Mips R6 requires lhs != rhs for compact branches. - if (index == length) { - __ B(slow_path->GetEntryLabel()); - } else { - __ Bgeuc(index, length, slow_path->GetEntryLabel()); - } + __ Bgeuc(index, length, slow_path->GetEntryLabel()); } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { @@ -1729,11 +1752,7 @@ void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { Primitive::Type in_type = compare->InputAt(0)->GetType(); - LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type) - ? LocationSummary::kCall - : LocationSummary::kNoCall; - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); switch (in_type) { case Primitive::kPrimLong: @@ -1743,13 +1762,11 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { break; case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - } default: LOG(FATAL) << "Unexpected type for compare operation " << in_type; @@ -1758,14 +1775,15 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { LocationSummary* locations = instruction->GetLocations(); + GpuRegister res = locations->Out().AsRegister<GpuRegister>(); Primitive::Type in_type = instruction->InputAt(0)->GetType(); + bool gt_bias = instruction->IsGtBias(); // 0 if: left == right // 1 if: left > right // -1 if: left < right switch (in_type) { case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); bool use_imm = rhs_location.IsConstant(); @@ -1780,22 +1798,52 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { rhs = rhs_location.AsRegister<GpuRegister>(); } __ Slt(TMP, lhs, rhs); - __ Slt(dst, rhs, lhs); - __ Subu(dst, dst, TMP); + __ Slt(res, rhs, lhs); + __ Subu(res, res, TMP); + break; + } + + case Primitive::kPrimFloat: { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqS(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); + } else { + __ CmpLtS(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); + } + __ Bind(&done); break; } - case Primitive::kPrimFloat: case Primitive::kPrimDouble: { - int32_t entry_point_offset; - if (in_type == Primitive::kPrimFloat) { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat) - : QUICK_ENTRY_POINT(pCmplFloat); + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqD(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble) - : QUICK_ENTRY_POINT(pCmplDouble); + __ CmpLtD(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } - codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); + __ Bind(&done); break; } @@ -1804,143 +1852,67 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { } } -void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) { +void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + switch (instruction->InputAt(0)->GetType()) { + default: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + break; + } if (instruction->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } -void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) { +void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { if (!instruction->NeedsMaterialization()) { return; } - // TODO: generalize to long - DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong); - + Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); + Mips64Label true_label; - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond = instruction->GetCondition(); - - switch (if_cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - if (if_cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - break; + switch (type) { + default: + // Integer case. + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations); + return; + case Primitive::kPrimLong: + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); + return; - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - if (if_cond == kCondGE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the slt instruction but no sge. - __ Xori(dst, dst, 1); - } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // TODO: don't use branches. + GenerateFpCompareAndBranch(instruction->GetCondition(), + instruction->IsGtBias(), + type, + locations, + &true_label); break; + } - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm + 1); - if (if_cond == kCondGT) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the slti instruction but no sgti. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - if (if_cond == kCondLE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the slt instruction but no sle. - __ Xori(dst, dst, 1); - } - } - break; + // Convert the branches into the result. + Mips64Label done; - case kCondB: - case kCondAE: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) { - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - if (if_cond == kCondAE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the sltu instruction but no sgeu. - __ Xori(dst, dst, 1); - } - break; + // False case: result = 0. + __ LoadConst32(dst, 0); + __ Bc(&done); - case kCondBE: - case kCondA: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Sltiu(dst, lhs, rhs_imm + 1); - if (if_cond == kCondA) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the sltiu instruction but no sgtiu. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - if (if_cond == kCondBE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the sltu instruction but no sleu. - __ Xori(dst, dst, 1); - } - } - break; - } + // True case: result = 1. + __ Bind(&true_label); + __ LoadConst32(dst, 1); + __ Bind(&done); } void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -2264,7 +2236,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (value.IsConstant()) { int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); if (divisor == 0) { - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); } else { // A division by a non-null constant is valid. We don't need to perform // any check, so simply fall through. @@ -2316,7 +2288,7 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); } if (!codegen_->GoesToNextBlock(block, successor)) { - __ B(codegen_->GetLabelOf(successor)); + __ Bc(codegen_->GetLabelOf(successor)); } } @@ -2339,10 +2311,333 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary } } +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* locations) { + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } + break; + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + if (cond == kCondGE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the slt instruction but no sge. + __ Xori(dst, dst, 1); + } + break; + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm_plus_one); + if (cond == kCondGT) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the slti instruction but no sgti. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + if (cond == kCondLE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the slt instruction but no sle. + __ Xori(dst, dst, 1); + } + } + break; + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + if (cond == kCondAE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the sltu instruction but no sgeu. + __ Xori(dst, dst, 1); + } + break; + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm_plus_one); + if (cond == kCondA) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the sltiu instruction but no sgtiu. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + if (cond == kCondBE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the sltu instruction but no sleu. + __ Xori(dst, dst, 1); + } + } + break; + } +} + +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + + if (use_imm && rhs_imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Beqzc(lhs, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Bnezc(lhs, label); + break; + case kCondLT: + __ Bltzc(lhs, label); + break; + case kCondGE: + __ Bgezc(lhs, label); + break; + case kCondLE: + __ Blezc(lhs, label); + break; + case kCondGT: + __ Bgtzc(lhs, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ Bc(label); + break; + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + switch (cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, label); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, label); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, label); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, label); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, label); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, label); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, label); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, label); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, label); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, label); + break; + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label) { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + } else { + __ CmpUltS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeS(FTMP, lhs, rhs); + } else { + __ CmpUleS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltS(FTMP, rhs, lhs); + } else { + __ CmpLtS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleS(FTMP, rhs, lhs); + } else { + __ CmpLeS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + } else { + __ CmpUltD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeD(FTMP, lhs, rhs); + } else { + __ CmpUleD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltD(FTMP, rhs, lhs); + } else { + __ CmpLtD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleD(FTMP, rhs, lhs); + } else { + __ CmpLeD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target) { + Mips64Label* true_target, + Mips64Label* false_target) { HInstruction* cond = instruction->InputAt(condition_input_index); if (true_target == nullptr && false_target == nullptr) { @@ -2352,12 +2647,12 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // Constant condition, statically compared against 1. if (cond->AsIntConstant()->IsOne()) { if (true_target != nullptr) { - __ B(true_target); + __ Bc(true_target); } } else { DCHECK(cond->AsIntConstant()->IsZero()); if (false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } return; @@ -2384,127 +2679,34 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); + Primitive::Type type = condition->InputAt(0)->GetType(); + LocationSummary* locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + Mips64Label* branch_target = true_target; - GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = condition->GetLocations()->InAt(1); - GpuRegister rhs_reg = ZERO; - int32_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond; - Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); - non_fallthrough_target = false_target; - } else { - if_cond = condition->GetCondition(); - non_fallthrough_target = true_target; - } - - if (use_imm && rhs_imm == 0) { - switch (if_cond) { - case kCondEQ: - __ Beqzc(lhs, non_fallthrough_target); - break; - case kCondNE: - __ Bnezc(lhs, non_fallthrough_target); - break; - case kCondLT: - __ Bltzc(lhs, non_fallthrough_target); - break; - case kCondGE: - __ Bgezc(lhs, non_fallthrough_target); - break; - case kCondLE: - __ Blezc(lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bgtzc(lhs, non_fallthrough_target); - break; - case kCondB: - break; // always false - case kCondBE: - __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero - break; - case kCondA: - __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero - break; - case kCondAE: - __ B(non_fallthrough_target); // always true - break; - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - // It looks like we can get here with lhs == rhs. Should that be possible at all? - // Mips R6 requires lhs != rhs for compact branches. - if (lhs == rhs_reg) { - DCHECK(!use_imm); - switch (if_cond) { - case kCondEQ: - case kCondGE: - case kCondLE: - case kCondBE: - case kCondAE: - // if lhs == rhs for a positive condition, then it is a branch - __ B(non_fallthrough_target); - break; - case kCondNE: - case kCondLT: - case kCondGT: - case kCondB: - case kCondA: - // if lhs == rhs for a negative condition, then it is a NOP - break; - } - } else { - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } - } + branch_target = false_target; + } + + switch (type) { + default: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target); + break; + case Primitive::kPrimLong: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); + break; } } // If neither branch falls through (case 3), the conditional branch to `true_target` // was already emitted (case 2) and we need to emit a jump to `false_target`. if (true_target != nullptr && false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } @@ -2518,9 +2720,9 @@ void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? nullptr : codegen_->GetLabelOf(true_successor); - Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } @@ -2543,6 +2745,14 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info ATTRIBUTE_UNUSED) { LocationSummary* locations = @@ -2695,7 +2905,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - Label done; + Mips64Label done; // Return 0 if `obj` is null. // TODO: Avoid this check if we know `obj` is not null. @@ -2790,6 +3000,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2924,13 +3135,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Jialc(&frame_entry_label_, T9); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // LR = invoke->GetDirectCodePtr(); __ LoadConst64(T9, invoke->GetDirectCodePtr()); // LR() __ Jalr(T9); + __ Nop(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: @@ -2947,6 +3159,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kMips64WordSize).Int32Value()); // T9() __ Jalr(T9); + __ Nop(); break; } DCHECK(!IsLeafMethod()); @@ -2970,8 +3183,13 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi } void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + GpuRegister receiver = calling_convention.GetRegisterAt(0); + GpuRegister temp = temp_location.AsRegister<GpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kMips64PointerSize).SizeValue(); @@ -2979,8 +3197,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize); // temp = object->GetClass(); - DCHECK(receiver.IsRegister()); - __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset); + __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset); MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); @@ -2988,6 +3205,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -3016,6 +3234,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3027,22 +3246,27 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadUnsignedWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadDoubleword, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); - __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); + __ LoadFromOffset( + kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); // TODO: We will need a read barrier here. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqzc(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3079,26 +3303,31 @@ void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_U } void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = load->IsInDexCache() + ? LocationSummary::kNoCall + : LocationSummary::kCallOnSlowPath; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); __ LoadFromOffset(kLoadUnsignedWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); + __ LoadFromOffset( + kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); // TODO: We will need a read barrier here. - __ Beqzc(out, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + + if (!load->IsInDexCache()) { + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); + codegen_->AddSlowPath(slow_path); + __ Beqzc(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderMIPS64::VisitLocal(HLocal* local) { @@ -3132,7 +3361,11 @@ void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderMIPS64::VisitMul(HMul* mul) { @@ -3451,6 +3684,11 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } default: @@ -3484,6 +3722,16 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ codegen_->GenerateFrameExit(); } +void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + void LocationsBuilderMIPS64::VisitShl(HShl* shl) { HandleShift(shl); } @@ -3760,6 +4008,11 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); @@ -3775,6 +4028,19 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + } + } else { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -3817,83 +4083,83 @@ void InstructionCodeGeneratorMIPS64::VisitBoundType(HBoundType* instruction ATTR } void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) { @@ -3922,22 +4188,39 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. + // Create a set of compare/jumps. + GpuRegister temp_reg = TMP; + if (IsInt<16>(-lower_bound)) { + __ Addiu(temp_reg, value_reg, -lower_bound); + } else { + __ LoadConst32(AT, -lower_bound); + __ Addu(temp_reg, value_reg, AT); + } + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqzc(value_reg, succ); - } else { - __ LoadConst32(TMP, case_value); - __ Beqc(value_reg, TMP, succ); - } + // Jump to successors[0] if value == lower_bound. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + __ Bc(codegen_->GetLabelOf(default_block)); } } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index a078dd1819..60ff96dc43 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -158,12 +158,12 @@ class SlowPathCodeMIPS64 : public SlowPathCode { public: SlowPathCodeMIPS64() : entry_label_(), exit_label_() {} - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } + Mips64Label* GetEntryLabel() { return &entry_label_; } + Mips64Label* GetExitLabel() { return &exit_label_; } private: - Label entry_label_; - Label exit_label_; + Mips64Label entry_label_; + Mips64Label exit_label_; DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64); }; @@ -189,6 +189,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -224,6 +225,7 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -231,12 +233,22 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target); + Mips64Label* true_target, + Mips64Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations); + void GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label); + void GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); Mips64Assembler* const assembler_; @@ -265,7 +277,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; } uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { - return GetLabelOf(block)->Position(); + return assembler_.GetLabelLocation(GetLabelOf(block)); } HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -298,12 +310,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { return isa_features_; } - Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_, block); + Mips64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Mips64Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_ = CommonInitializeLabels<Label>(); + block_labels_ = CommonInitializeLabels<Mips64Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -349,8 +361,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - Label* block_labels_; // Indexed by block id. - Label frame_entry_label_; + Mips64Label* block_labels_; // Indexed by block id. + Mips64Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 53e33bf5c1..fd18917842 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -42,7 +42,6 @@ namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = EAX; - static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI }; static constexpr int kC2ConditionMask = 0x400; @@ -67,6 +66,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -93,6 +93,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -152,6 +153,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -177,6 +179,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -222,6 +225,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); @@ -257,6 +261,11 @@ class LoadClassSlowPathX86 : public SlowPathCode { x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType), at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -368,6 +377,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } @@ -410,6 +420,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -422,6 +433,56 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathX86 : public SlowPathCode { + public: + ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + x86_codegen->Move32(out_, Location::RegisterLocation(EAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { public: @@ -443,7 +504,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { // to be instrumented, e.g.: // // __ movl(out, Address(out, offset)); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -459,7 +520,9 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -601,14 +664,18 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { class ReadBarrierForRootSlowPathX86 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -1487,7 +1554,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio Location lhs = condition->GetLocations()->InAt(0); Location rhs = condition->GetLocations()->InAt(1); - // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition). + // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition). if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { @@ -1549,6 +1616,14 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderX86::VisitLocal(HLocal* local) { local->SetLocations(nullptr); } @@ -1592,7 +1667,7 @@ void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) { void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) { } -void LocationsBuilderX86::VisitCondition(HCondition* cond) { +void LocationsBuilderX86::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. @@ -1625,7 +1700,7 @@ void LocationsBuilderX86::VisitCondition(HCondition* cond) { } } -void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { +void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { if (!cond->NeedsMaterialization()) { return; } @@ -1686,83 +1761,83 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { } void LocationsBuilderX86::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { @@ -1820,7 +1895,7 @@ void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { @@ -1917,8 +1992,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), - Location::RequiresRegister()); + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } if (codegen_->IsBaseline()) { @@ -1958,6 +2032,11 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec } void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderX86 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } @@ -2460,6 +2539,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2468,6 +2548,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -3298,11 +3379,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); } else { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); } break; } @@ -3740,6 +3823,92 @@ void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register __ Bind(&done); } +void LocationsBuilderX86::VisitRor(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + + switch (ror->GetResultType()) { + case Primitive::kPrimLong: + // Add the temporary needed. + locations->AddTemp(Location::RequiresRegister()); + FALLTHROUGH_INTENDED; + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + // The shift count needs to be in CL (unless it is a constant). + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1))); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86::VisitRor(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + if (ror->GetResultType() == Primitive::kPrimInt) { + Register first_reg = first.AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + __ rorl(first_reg, second_reg); + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + __ rorl(first_reg, imm); + } + return; + } + + DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong); + Register first_reg_lo = first.AsRegisterPairLow<Register>(); + Register first_reg_hi = first.AsRegisterPairHigh<Register>(); + Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(second_reg, ECX); + __ movl(temp_reg, first_reg_hi); + __ shrd(first_reg_hi, first_reg_lo, second_reg); + __ shrd(first_reg_lo, temp_reg, second_reg); + __ movl(temp_reg, first_reg_hi); + __ testl(second_reg, Immediate(32)); + __ cmovl(kNotEqual, first_reg_hi, first_reg_lo); + __ cmovl(kNotEqual, first_reg_lo, temp_reg); + } else { + int32_t shift_amt = + CodeGenerator::GetInt64ValueOf(second.GetConstant()) & kMaxLongShiftValue; + if (shift_amt == 0) { + // Already fine. + return; + } + if (shift_amt == 32) { + // Just swap. + __ movl(temp_reg, first_reg_lo); + __ movl(first_reg_lo, first_reg_hi); + __ movl(first_reg_hi, temp_reg); + return; + } + + Immediate imm(shift_amt); + // Save the constents of the low value. + __ movl(temp_reg, first_reg_lo); + + // Shift right into low, feeding bits from high. + __ shrd(first_reg_lo, first_reg_hi, imm); + + // Shift right into high, feeding bits from the original low. + __ shrd(first_reg_hi, temp_reg, imm); + + // Swap if needed. + if (shift_amt > 32) { + __ movl(temp_reg, first_reg_lo); + __ movl(first_reg_lo, first_reg_hi); + __ movl(first_reg_hi, temp_reg); + } + } +} + void LocationsBuilderX86::VisitShl(HShl* shl) { HandleShift(shl); } @@ -3780,6 +3949,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3796,13 +3966,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3986,7 +4156,7 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { +void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { /* * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. @@ -3994,7 +4164,7 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: @@ -4134,12 +4304,16 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp Register temp = temp_in.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86PointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - DCHECK(receiver.IsRegister()); // /* HeapReference<Class> */ temp = receiver->klass_ - __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); + __ movl(temp, Address(receiver, class_offset)); MaybeRecordImplicitNullCheck(invoke); // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. @@ -4236,9 +4410,14 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { // Long values can be loaded atomically into an XMM using movsd. - // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM - // and then copy the XMM into the output 32bits at a time). + // So we use an XMM register as a temp to achieve atomicity (first + // load the temp into the XMM and then copy the XMM into the + // output, 32 bits at a time). locations->AddTemp(Location::RequiresFpuRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4276,9 +4455,32 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } case Primitive::kPrimInt: - case Primitive::kPrimNot: { __ movl(out.AsRegister<Register>(), Address(base, offset)); break; + + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ movl(out.AsRegister<Register>(), Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } + break; } case Primitive::kPrimLong: { @@ -4313,17 +4515,20 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - // Longs are handled in the switch. - if (field_type != Primitive::kPrimLong) { + if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) { + // Potential implicit null checks, in the case of reference or + // long fields, are handled in the previous switch statement. + } else { codegen_->MaybeRecordImplicitNullCheck(instruction); } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - - if (field_type == Primitive::kPrimNot) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } } @@ -4388,7 +4593,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } bool maybe_record_implicit_null_check_done = false; @@ -4493,7 +4698,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } @@ -4674,6 +4879,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -4681,12 +4891,13 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); + Location out_loc = locations->Out(); Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4698,7 +4909,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4710,7 +4921,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4722,7 +4933,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4732,13 +4943,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + case Primitive::kPrimInt: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { __ movl(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4748,20 +4955,56 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + } else { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + uint32_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - Location out = locations->Out(); - DCHECK_NE(obj, out.AsRegisterPairLow<Register>()); + DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset)); + __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); + __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); } else { - __ movl(out.AsRegisterPairLow<Register>(), + __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(out.AsRegisterPairHigh<Register>(), + __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize)); } break; @@ -4769,7 +5012,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movss(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4781,7 +5024,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movsd(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); @@ -4796,23 +5039,12 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { UNREACHABLE(); } - if (type != Primitive::kPrimLong) { + if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) { + // Potential implicit null checks, in the case of reference or + // long arrays, are handled in the previous switch statement. + } else { codegen_->MaybeRecordImplicitNullCheck(instruction); } - - if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location out = locations->Out(); - if (index.IsConstant()) { - uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); - } - } } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { @@ -4854,7 +5086,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); // Possibly used for read barrier too. + locations->AddTemp(Location::RegisterLocation(ECX)); } } @@ -4944,12 +5176,12 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { // __ movl(temp2, temp); // // /* HeapReference<Class> */ temp = temp->component_type_ // __ movl(temp, Address(temp, component_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp_loc, temp_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = register_value->klass_ // __ movl(temp2, Address(register_value, class_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); // // __ cmpl(temp, temp2); @@ -5230,8 +5462,8 @@ void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instructio DCHECK_EQ(slow_path->GetSuccessor(), successor); } - __ fs()->cmpw(Address::Absolute( - Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); + __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), + Immediate(0)); if (successor == nullptr) { __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetReturnLabel()); @@ -5501,6 +5733,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5511,43 +5744,33 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leal(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movl(out, Address(current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value())); + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ leal(out, Address(out, cache_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ movl(out, Address(out, cache_offset)); - } + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5580,49 +5803,36 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( } void LocationsBuilderX86::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leal(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ leal(out, Address(out, cache_offset)); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ movl(out, Address(out, cache_offset)); + if (!load->IsInDexCache()) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } - - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { @@ -5659,6 +5869,15 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); +} + +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); } void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { @@ -5686,21 +5905,22 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); + Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5716,10 +5936,9 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ movl(out, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<Register>()); @@ -5740,17 +5959,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5779,17 +5989,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } __ j(kEqual, &success); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5813,17 +6014,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } __ j(kEqual, &exact_check); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ movl(out, Address(out, component_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5867,6 +6059,13 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, /* is_fatal */ false); @@ -5919,27 +6118,27 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); + Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5959,8 +6158,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -5982,18 +6180,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -6006,8 +6194,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -6033,18 +6220,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } __ j(kEqual, &done); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -6056,8 +6233,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6074,19 +6250,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ movl(temp, Address(temp, component_offset)); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -6100,8 +6265,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -6109,8 +6273,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6127,6 +6290,13 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -6148,6 +6318,11 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } @@ -6283,14 +6458,226 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } -void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, out_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ movl(temp.AsRegister<Register>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + Register obj_reg = obj.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, obj_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset) { + Register root_reg = root.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ leal(root_reg, Address(obj, offset)); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + } +} + +void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Address src(obj, offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} + +void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + Address src = index.IsConstant() ? + Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : + Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} + +void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + const Address& src, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as: + // - it implements the load-load fence using a data dependency on + // the high-bits of rb_state, which are expected to be all zeroes; + // - it performs additional checks that we do not do here for + // performance reasons. + + Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ movl(temp_reg, Address(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); + __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86 memory model. + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // The actual reference load. + // /* HeapReference<Object> */ ref = *src + __ movl(ref_reg, src); + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} +void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -6304,57 +6691,41 @@ void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { __ UnpoisonHeapReference(out.AsRegister<Register>()); } } -void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -6388,31 +6759,67 @@ void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); +void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - if (case_value == 0) { - __ testl(value_reg, value_reg); - } else { - __ cmpl(value_reg, Immediate(case_value)); - } - __ j(kEqual, codegen_->GetLabelOf(successors[i])); + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ jmp(codegen_->GetLabelOf(default_block)); + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + switch_instr->GetDefaultBlock()); +} + void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); @@ -6427,11 +6834,20 @@ void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + if (num_entries <= kPackedSwitchJumpTableThreshold) { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + default_block); + return; + } + // Optimizing has a jump area. Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); Register constant_area = locations->InAt(1).AsRegister<Register>(); @@ -6443,7 +6859,7 @@ void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_ } // Is the value in range? - DCHECK_GE(num_entries, 1); + DCHECK_GE(num_entries, 1u); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); @@ -6668,7 +7084,7 @@ Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, // TODO: target as memory. void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) { if (!target.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK_EQ(type, Primitive::kPrimVoid); return; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 064051c7f4..3d343177d0 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -166,6 +167,7 @@ class LocationsBuilderX86 : public HGraphVisitor { private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -195,6 +197,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { X86Assembler* GetAssembler() const { return assembler_; } + // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump + // table version generates 7 instructions and num_entries literals. Compare/jump sequence will + // generates less code/data with a small num_entries. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; + private: // Generate code for the given suspend check. If not null, `successor` // is the block to branch to if the suspend check is not needed, and after @@ -207,6 +214,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void DivByPowerOfTwo(HDiv* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem* rem); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); @@ -214,11 +222,44 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); - void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a read barrier. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a Baker's read barrier. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset); + // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. // `is_wide` specifies whether it is long/double or not. void PushOntoFPStack(Location source, uint32_t temp_offset, @@ -236,6 +277,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -354,6 +400,8 @@ class CodeGeneratorX86 : public CodeGenerator { Register value, bool value_can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_, block); } @@ -395,7 +443,26 @@ class CodeGeneratorX86 : public CodeGenerator { void Finalize(CodeAllocator* allocator) OVERRIDE; - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -412,23 +479,25 @@ class CodeGeneratorX86 : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -438,9 +507,31 @@ class CodeGeneratorX86 : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. + // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. + void MemoryFence(bool non_temporal = false) { + if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(ESP, 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + const Address& src, + Location temp, + bool needs_null_check); + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); struct PcRelativeDexCacheAccessInfo { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 0e0b8698e9..ffd8c42e20 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -41,6 +41,10 @@ namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = RDI; +// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump +// table version generates 7 instructions and num_entries literals. Compare/jump sequence will +// generates less code/data with a small num_entries. +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; @@ -65,6 +69,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -91,6 +96,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -149,6 +155,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -203,6 +210,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -240,6 +248,11 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } Location out = locations->Out(); // Move the class to the desired location. @@ -290,6 +303,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -386,6 +400,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { deoptimize, deoptimize->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -428,6 +443,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -440,6 +456,56 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); + x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); + + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { public: @@ -461,7 +527,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { // reference load to be instrumented, e.g.: // // __ movl(out, Address(out, offset)); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -477,7 +543,9 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -618,13 +686,17 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -715,7 +787,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint __ gs()->movl(temp.AsRegister<CpuRegister>(), - Address::Absolute(invoke->GetStringInitOffset(), true)); + Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); @@ -732,7 +804,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, invoke->GetDexCacheArrayOffset()); __ movq(temp.AsRegister<CpuRegister>(), - Address::Absolute(kDummy32BitOffset, false /* no_rip */)); + Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); // Bind the label at the end of the "movl" insn. __ Bind(&pc_relative_dex_cache_patches_.back().label); break; @@ -790,12 +862,17 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t CpuRegister temp = temp_in.AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); + size_t class_offset = mirror::Object::ClassOffset().SizeValue(); - DCHECK(receiver.IsRegister()); // /* HeapReference<Class> */ temp = receiver->klass_ - __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); + __ movl(temp, Address(CpuRegister(receiver), class_offset)); MaybeRecordImplicitNullCheck(invoke); // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. @@ -886,7 +963,7 @@ void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(instruction, slow_path); - __ gs()->call(Address::Absolute(entry_point_offset, true)); + __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1523,6 +1600,14 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderX86_64::VisitLocal(HLocal* local) { local->SetLocations(nullptr); } @@ -1566,7 +1651,7 @@ void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) { void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) { } -void LocationsBuilderX86_64::VisitCondition(HCondition* cond) { +void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. @@ -1590,7 +1675,7 @@ void LocationsBuilderX86_64::VisitCondition(HCondition* cond) { } } -void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { +void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { if (!cond->NeedsMaterialization()) { return; } @@ -1688,83 +1773,83 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { } void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { @@ -1918,7 +2003,7 @@ void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) } void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { @@ -2646,7 +2731,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } else { DCHECK(in.GetConstant()->IsIntConstant()); __ movl(out.AsRegister<CpuRegister>(), - Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue()))); + Immediate(static_cast<uint16_t>( + in.GetConstant()->AsIntConstant()->GetValue()))); } break; @@ -2890,7 +2976,8 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ addss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ addss(first.AsFpuRegister<XmmRegister>(), @@ -2904,7 +2991,8 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ addsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ addsd(first.AsFpuRegister<XmmRegister>(), @@ -2979,7 +3067,8 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ subss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ subss(first.AsFpuRegister<XmmRegister>(), @@ -2993,7 +3082,8 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ subsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ subsd(first.AsFpuRegister<XmmRegister>(), @@ -3100,7 +3190,8 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ mulss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ mulss(first.AsFpuRegister<XmmRegister>(), @@ -3115,7 +3206,8 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ mulsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ mulsd(first.AsFpuRegister<XmmRegister>(), @@ -3521,7 +3613,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ divss(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + codegen_->LiteralFloatAddress( + second.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(second.IsStackSlot()); __ divss(first.AsFpuRegister<XmmRegister>(), @@ -3535,7 +3628,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { __ divsd(first.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + codegen_->LiteralDoubleAddress( + second.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(second.IsDoubleStackSlot()); __ divsd(first.AsFpuRegister<XmmRegister>(), @@ -3734,6 +3828,56 @@ void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { } default: LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitRor(HRor* ror) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + + switch (ror->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + // The shift count needs to be in CL (unless it is a constant). + locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1))); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); + CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); + Location second = locations->InAt(1); + + switch (ror->GetResultType()) { + case Primitive::kPrimInt: + if (second.IsRegister()) { + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + __ rorl(first_reg, second_reg); + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + __ rorl(first_reg, imm); + } + break; + case Primitive::kPrimLong: + if (second.IsRegister()) { + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + __ rorq(first_reg, second_reg); + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue); + __ rorq(first_reg, imm); + } + break; + default: + LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); + UNREACHABLE(); } } @@ -3777,6 +3921,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3795,13 +3940,13 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3888,15 +4033,15 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED LOG(FATAL) << "Unimplemented"; } -void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { +void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { /* * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence. - * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. + * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model. * For those cases, all we need to ensure is that there is a scheduling barrier in place. */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: @@ -3931,6 +4076,11 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, @@ -3966,12 +4116,36 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } + break; + } + case Primitive::kPrimLong: { __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); break; @@ -3992,14 +4166,20 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); - - if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + if (field_type == Primitive::kPrimNot) { + // Potential implicit null checks, in the case of reference + // fields, are handled in the previous switch statement. + } else { + codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (field_type == Primitive::kPrimNot) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); + if (is_volatile) { + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } } @@ -4053,7 +4233,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, uint32_t offset = field_info.GetFieldOffset().Uint32Value(); if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } bool maybe_record_implicit_null_check_done = false; @@ -4159,7 +4339,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } @@ -4336,6 +4516,11 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { @@ -4343,12 +4528,13 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { Location obj_loc = locations->InAt(0); CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); - Primitive::Type type = instruction->GetType(); + Location out_loc = locations->Out(); + Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4360,7 +4546,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); @@ -4372,7 +4558,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4384,7 +4570,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); @@ -4394,13 +4580,9 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + case Primitive::kPrimInt: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movl(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4410,9 +4592,46 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + } else { + CpuRegister out = out_loc.AsRegister<CpuRegister>(); + if (index.IsConstant()) { + uint32_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ movl(out, Address(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movq(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); @@ -4424,7 +4643,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movss(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); @@ -4436,7 +4655,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movsd(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); @@ -4450,20 +4669,12 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location out = locations->Out(); - if (index.IsConstant()) { - uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); - } + // Potential implicit null checks, in the case of reference + // arrays, are handled in the previous switch statement. + } else { + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4496,8 +4707,6 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { // This first temporary register is possibly used for heap // reference poisoning and/or read barrier emission too. locations->AddTemp(Location::RequiresRegister()); - // This second temporary register is possibly used for read - // barrier emission too. locations->AddTemp(Location::RequiresRegister()); } } @@ -4589,12 +4798,12 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { // __ movl(temp2, temp); // // /* HeapReference<Class> */ temp = temp->component_type_ // __ movl(temp, Address(temp, component_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp_loc, temp_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = register_value->klass_ // __ movl(temp2, Address(register_value, class_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc); // // __ cmpl(temp, temp2); @@ -4820,8 +5029,8 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, __ testl(value, value); __ j(kEqual, &is_null); } - __ gs()->movq(card, Address::Absolute( - Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true)); + __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true)); __ movq(temp, object); __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); __ movb(Address(temp, card, TIMES_1, 0), card); @@ -4880,8 +5089,9 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc DCHECK_EQ(slow_path->GetSuccessor(), successor); } - __ gs()->cmpw(Address::Absolute( - Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); + __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true), + Immediate(0)); if (successor == nullptr) { __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetReturnLabel()); @@ -5105,7 +5315,7 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( Immediate(mirror::Class::kStatusInitialized)); __ j(kLess, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - // No need for memory fence, thanks to the X86_64 memory model. + // No need for memory fence, thanks to the x86-64 memory model. } void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { @@ -5125,6 +5335,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5135,43 +5346,31 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leaq(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movq(out, Address(current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value())); + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ leaq(out, Address(out, cache_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ movl(out, Address(out, cache_offset)); - } - - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5195,53 +5394,41 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { } void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); - codegen_->AddSlowPath(slow_path); - LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ leaq(out, Address(current_method, declaring_class_offset)); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ movl(out, Address(current_method, declaring_class_offset)); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ leaq(out, Address(out, cache_offset)); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ movl(out, Address(out, cache_offset)); + if (!load->IsInDexCache()) { + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } - - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); } static Address GetExceptionTlsAddress() { - return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true); + return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true); } void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { @@ -5274,6 +5461,15 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); +} + +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5301,21 +5497,22 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); + Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5331,10 +5528,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ movl(out, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<CpuRegister>()); @@ -5360,17 +5556,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop, success; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5399,17 +5586,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } __ j(kEqual, &success); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ movl(out, Address(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5433,17 +5611,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } __ j(kEqual, &exact_check); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - __ movl(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ movl(out, Address(out, component_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5487,6 +5656,13 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, /* is_fatal */ false); @@ -5539,27 +5715,27 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5571,7 +5747,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); - NearLabel done; + Label done; // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); @@ -5579,8 +5755,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -5602,18 +5777,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. NearLabel loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -5626,8 +5791,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -5653,18 +5817,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } __ j(kEqual, &done); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ movl(temp, Address(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -5676,8 +5830,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -5694,19 +5847,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>(); - __ movl(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ movl(temp, Address(temp, component_offset)); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -5720,8 +5862,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -5729,8 +5870,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kEqual, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ movl(temp, Address(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -5747,6 +5887,13 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ jmp(type_check_slow_path->GetEntryLabel()); break; } @@ -5768,6 +5915,11 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } @@ -5885,14 +6037,227 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } -void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp) { + CpuRegister out_reg = out.AsRegister<CpuRegister>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, out_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ movl(temp.AsRegister<CpuRegister>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ movl(out_reg, Address(out_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp) { + CpuRegister out_reg = out.AsRegister<CpuRegister>(); + CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, obj_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ movl(out_reg, Address(obj_reg, offset)); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + CpuRegister obj, + uint32_t offset) { + CpuRegister root_reg = root.AsRegister<CpuRegister>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(), + /* no_rip */ true), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ leaq(root_reg, Address(obj, offset)); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ movl(root_reg, Address(obj, offset)); + } +} + +void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Address src(obj, offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} + +void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + Address src = index.IsConstant() ? + Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) : + Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset); + GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check); +} + +void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& src, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as: + // - it implements the load-load fence using a data dependency on + // the high-bits of rb_state, which are expected to be all zeroes; + // - it performs additional checks that we do not do here for + // performance reasons. + + CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); + CpuRegister temp_reg = temp.AsRegister<CpuRegister>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ movl(temp_reg, Address(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); + __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Load fence to prevent load-load reordering. + // Note that this is a no-op, thanks to the x86-64 memory model. + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + // The actual reference load. + // /* HeapReference<Object> */ ref = *src + __ movl(ref_reg, src); + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -5906,57 +6271,41 @@ void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); } } -void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -5994,11 +6343,58 @@ void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Should we generate smaller inline compare/jumps? + if (num_entries <= kPackedSwitchJumpTableThreshold) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg_in, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); + + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg_in, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg_in, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); + } + return; + } // Remove the bias, if needed. Register value_reg_out = value_reg_in.AsRegister(); @@ -6009,7 +6405,6 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins CpuRegister value_reg(value_reg_out); // Is the value in range? - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); @@ -6159,7 +6554,7 @@ Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { // TODO: trg as memory. void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK_EQ(type, Primitive::kPrimVoid); return; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 145b1f33b4..9995416138 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -47,6 +48,12 @@ static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +// These XMM registers are non-volatile in ART ABI, but volatile in native ABI. +// If the ART ABI changes, this list must be updated. It is used to ensure that +// these are not clobbered by any direct call to native code (such as math intrinsics). +static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; + + class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeRuntimeCallingConvention() @@ -165,6 +172,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction); @@ -206,12 +214,46 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void DivByPowerOfTwo(HDiv* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* operation); - void GenerateMemoryBarrier(MemBarrierKind kind); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a read barrier. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a Baker's read barrier. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + CpuRegister obj, + uint32_t offset); + void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void PushOntoFPStack(Location source, uint32_t temp_offset, @@ -318,6 +360,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister value, bool value_can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + // Helper method to move a value between two locations. void Move(Location destination, Location source); @@ -350,7 +394,26 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + CpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + CpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -367,23 +430,25 @@ class CodeGeneratorX86_64 : public CodeGenerator { // When `index` provided (i.e., when it is different from // Location::NoLocation()), the offset value passed to // artReadBarrierSlow is adjusted to take `index` into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -393,7 +458,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); int ConstantAreaStart() const { return constant_area_start_; @@ -417,7 +482,28 @@ class CodeGeneratorX86_64 : public CodeGenerator { int64_t v, HInstruction* instruction); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. The locked add should not be used for + // ordering non-temporal stores. + void MemoryFence(bool force_mfence = false) { + if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& src, + Location temp, + bool needs_null_check); + struct PcRelativeDexCacheAccessInfo { PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) : target_dex_file(dex_file), element_offset(element_off), label() { } diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 57de41f557..d970704368 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -35,6 +35,7 @@ #include "code_generator_mips64.h" #include "code_generator_x86.h" #include "code_generator_x86_64.h" +#include "code_simulator_container.h" #include "common_compiler_test.h" #include "dex_file.h" #include "dex_instruction.h" @@ -124,26 +125,85 @@ class InternalCodeAllocator : public CodeAllocator { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; +static bool CanExecuteOnHardware(InstructionSet target_isa) { + return (target_isa == kRuntimeISA) + // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2). + || (kRuntimeISA == kArm && target_isa == kThumb2); +} + +static bool CanExecute(InstructionSet target_isa) { + CodeSimulatorContainer simulator(target_isa); + return CanExecuteOnHardware(target_isa) || simulator.CanSimulate(); +} + +template <typename Expected> +static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)()); + +template <> +bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) { + simulator->RunFrom(reinterpret_cast<intptr_t>(f)); + return simulator->GetCReturnBool(); +} + +template <> +int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) { + simulator->RunFrom(reinterpret_cast<intptr_t>(f)); + return simulator->GetCReturnInt32(); +} + +template <> +int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) { + simulator->RunFrom(reinterpret_cast<intptr_t>(f)); + return simulator->GetCReturnInt64(); +} + +template <typename Expected> +static void VerifyGeneratedCode(InstructionSet target_isa, + Expected (*f)(), + bool has_result, + Expected expected) { + ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable."; + + // Verify on simulator. + CodeSimulatorContainer simulator(target_isa); + if (simulator.CanSimulate()) { + Expected result = SimulatorExecute<Expected>(simulator.Get(), f); + if (has_result) { + ASSERT_EQ(expected, result); + } + } + + // Verify on hardware. + if (CanExecuteOnHardware(target_isa)) { + Expected result = f(); + if (has_result) { + ASSERT_EQ(expected, result); + } + } +} + template <typename Expected> static void Run(const InternalCodeAllocator& allocator, const CodeGenerator& codegen, bool has_result, Expected expected) { + InstructionSet target_isa = codegen.GetInstructionSet(); + typedef Expected (*fptr)(); CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); fptr f = reinterpret_cast<fptr>(allocator.GetMemory()); - if (codegen.GetInstructionSet() == kThumb2) { + if (target_isa == kThumb2) { // For thumb we need the bottom bit set. f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); } - Expected result = f(); - if (has_result) { - ASSERT_EQ(expected, result); - } + VerifyGeneratedCode(target_isa, f, has_result, expected); } template <typename Expected> -static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { +static void RunCodeBaseline(InstructionSet target_isa, + HGraph* graph, + bool has_result, + Expected expected) { InternalCodeAllocator allocator; CompilerOptions compiler_options; @@ -153,7 +213,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { // We avoid doing a stack overflow check that requires the runtime being setup, // by making sure the compiler knows the methods we are running are leaf methods. codegenX86.CompileBaseline(&allocator, true); - if (kRuntimeISA == kX86) { + if (target_isa == kX86) { Run(allocator, codegenX86, has_result, expected); } @@ -161,7 +221,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { ArmInstructionSetFeatures::FromCppDefines()); TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); codegenARM.CompileBaseline(&allocator, true); - if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { + if (target_isa == kArm || target_isa == kThumb2) { Run(allocator, codegenARM, has_result, expected); } @@ -169,7 +229,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { X86_64InstructionSetFeatures::FromCppDefines()); x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); codegenX86_64.CompileBaseline(&allocator, true); - if (kRuntimeISA == kX86_64) { + if (target_isa == kX86_64) { Run(allocator, codegenX86_64, has_result, expected); } @@ -177,7 +237,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Arm64InstructionSetFeatures::FromCppDefines()); arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); codegenARM64.CompileBaseline(&allocator, true); - if (kRuntimeISA == kArm64) { + if (target_isa == kArm64) { Run(allocator, codegenARM64, has_result, expected); } @@ -193,7 +253,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Mips64InstructionSetFeatures::FromCppDefines()); mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); codegenMIPS64.CompileBaseline(&allocator, true); - if (kRuntimeISA == kMips64) { + if (target_isa == kMips64) { Run(allocator, codegenMIPS64, has_result, expected); } } @@ -221,37 +281,38 @@ static void RunCodeOptimized(CodeGenerator* codegen, } template <typename Expected> -static void RunCodeOptimized(HGraph* graph, +static void RunCodeOptimized(InstructionSet target_isa, + HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { CompilerOptions compiler_options; - if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { - TestCodeGeneratorARM codegenARM(graph, - *ArmInstructionSetFeatures::FromCppDefines(), - compiler_options); + if (target_isa == kArm || target_isa == kThumb2) { + std::unique_ptr<const ArmInstructionSetFeatures> features_arm( + ArmInstructionSetFeatures::FromCppDefines()); + TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); - } else if (kRuntimeISA == kArm64) { - arm64::CodeGeneratorARM64 codegenARM64(graph, - *Arm64InstructionSetFeatures::FromCppDefines(), - compiler_options); + } else if (target_isa == kArm64) { + std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( + Arm64InstructionSetFeatures::FromCppDefines()); + arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); - } else if (kRuntimeISA == kX86) { + } else if (target_isa == kX86) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); - } else if (kRuntimeISA == kX86_64) { + } else if (target_isa == kX86_64) { std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( X86_64InstructionSetFeatures::FromCppDefines()); x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); - } else if (kRuntimeISA == kMips) { + } else if (target_isa == kMips) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( MipsInstructionSetFeatures::FromCppDefines()); mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected); - } else if (kRuntimeISA == kMips64) { + } else if (target_isa == kMips64) { std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( Mips64InstructionSetFeatures::FromCppDefines()); mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); @@ -259,7 +320,10 @@ static void RunCodeOptimized(HGraph* graph, } } -static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { +static void TestCode(InstructionSet target_isa, + const uint16_t* data, + bool has_result = false, + int32_t expected = 0) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateGraph(&arena); @@ -269,10 +333,13 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe ASSERT_TRUE(graph_built); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCodeBaseline(graph, has_result, expected); + RunCodeBaseline(target_isa, graph, has_result, expected); } -static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { +static void TestCodeLong(InstructionSet target_isa, + const uint16_t* data, + bool has_result, + int64_t expected) { ArenaPool pool; ArenaAllocator arena(&pool); HGraph* graph = CreateGraph(&arena); @@ -282,108 +349,110 @@ static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected ASSERT_TRUE(graph_built); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCodeBaseline(graph, has_result, expected); + RunCodeBaseline(target_isa, graph, has_result, expected); } -TEST(CodegenTest, ReturnVoid) { +class CodegenTest: public ::testing::TestWithParam<InstructionSet> {}; + +TEST_P(CodegenTest, ReturnVoid) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); - TestCode(data); + TestCode(GetParam(), data); } -TEST(CodegenTest, CFG1) { +TEST_P(CodegenTest, CFG1) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(data); + TestCode(GetParam(), data); } -TEST(CodegenTest, CFG2) { +TEST_P(CodegenTest, CFG2) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(data); + TestCode(GetParam(), data); } -TEST(CodegenTest, CFG3) { +TEST_P(CodegenTest, CFG3) { const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); - TestCode(data1); + TestCode(GetParam(), data1); const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); - TestCode(data2); + TestCode(GetParam(), data2); const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); - TestCode(data3); + TestCode(GetParam(), data3); } -TEST(CodegenTest, CFG4) { +TEST_P(CodegenTest, CFG4) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); - TestCode(data); + TestCode(GetParam(), data); } -TEST(CodegenTest, CFG5) { +TEST_P(CodegenTest, CFG5) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(data); + TestCode(GetParam(), data); } -TEST(CodegenTest, IntConstant) { +TEST_P(CodegenTest, IntConstant) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); - TestCode(data); + TestCode(GetParam(), data); } -TEST(CodegenTest, Return1) { +TEST_P(CodegenTest, Return1) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN | 0); - TestCode(data, true, 0); + TestCode(GetParam(), data, true, 0); } -TEST(CodegenTest, Return2) { +TEST_P(CodegenTest, Return2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 0 | 1 << 8, Instruction::RETURN | 1 << 8); - TestCode(data, true, 0); + TestCode(GetParam(), data, true, 0); } -TEST(CodegenTest, Return3) { +TEST_P(CodegenTest, Return3) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::RETURN | 1 << 8); - TestCode(data, true, 1); + TestCode(GetParam(), data, true, 1); } -TEST(CodegenTest, ReturnIf1) { +TEST_P(CodegenTest, ReturnIf1) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, @@ -391,10 +460,10 @@ TEST(CodegenTest, ReturnIf1) { Instruction::RETURN | 0 << 8, Instruction::RETURN | 1 << 8); - TestCode(data, true, 1); + TestCode(GetParam(), data, true, 1); } -TEST(CodegenTest, ReturnIf2) { +TEST_P(CodegenTest, ReturnIf2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, @@ -402,12 +471,12 @@ TEST(CodegenTest, ReturnIf2) { Instruction::RETURN | 0 << 8, Instruction::RETURN | 1 << 8); - TestCode(data, true, 0); + TestCode(GetParam(), data, true, 0); } // Exercise bit-wise (one's complement) not-int instruction. #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST(CodegenTest, TEST_NAME) { \ +TEST_P(CodegenTest, TEST_NAME) { \ const int32_t input = INPUT; \ const uint16_t input_lo = Low16Bits(input); \ const uint16_t input_hi = High16Bits(input); \ @@ -416,7 +485,7 @@ TEST(CodegenTest, TEST_NAME) { \ Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ Instruction::RETURN | 1 << 8); \ \ - TestCode(data, true, EXPECTED_OUTPUT); \ + TestCode(GetParam(), data, true, EXPECTED_OUTPUT); \ } NOT_INT_TEST(ReturnNotIntMinus2, -2, 1) @@ -432,7 +501,7 @@ NOT_INT_TEST(ReturnNotIntINT32_MAX, 2147483647, -2147483648) // -(2^31) // Exercise bit-wise (one's complement) not-long instruction. #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST(CodegenTest, TEST_NAME) { \ +TEST_P(CodegenTest, TEST_NAME) { \ const int64_t input = INPUT; \ const uint16_t word0 = Low16Bits(Low32Bits(input)); /* LSW. */ \ const uint16_t word1 = High16Bits(Low32Bits(input)); \ @@ -443,7 +512,7 @@ TEST(CodegenTest, TEST_NAME) { \ Instruction::NOT_LONG | 2 << 8 | 0 << 12, \ Instruction::RETURN_WIDE | 2 << 8); \ \ - TestCodeLong(data, true, EXPECTED_OUTPUT); \ + TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT); \ } NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1)) @@ -482,7 +551,7 @@ NOT_LONG_TEST(ReturnNotLongINT64_MAX, #undef NOT_LONG_TEST -TEST(CodegenTest, IntToLongOfLongToInt) { +TEST_P(CodegenTest, IntToLongOfLongToInt) { const int64_t input = INT64_C(4294967296); // 2^32 const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW. const uint16_t word1 = High16Bits(Low32Bits(input)); @@ -496,48 +565,48 @@ TEST(CodegenTest, IntToLongOfLongToInt) { Instruction::INT_TO_LONG | 2 << 8 | 4 << 12, Instruction::RETURN_WIDE | 2 << 8); - TestCodeLong(data, true, 1); + TestCodeLong(GetParam(), data, true, 1); } -TEST(CodegenTest, ReturnAdd1) { +TEST_P(CodegenTest, ReturnAdd1) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT, 1 << 8 | 0, Instruction::RETURN); - TestCode(data, true, 7); + TestCode(GetParam(), data, true, 7); } -TEST(CodegenTest, ReturnAdd2) { +TEST_P(CodegenTest, ReturnAdd2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(data, true, 7); + TestCode(GetParam(), data, true, 7); } -TEST(CodegenTest, ReturnAdd3) { +TEST_P(CodegenTest, ReturnAdd3) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(data, true, 7); + TestCode(GetParam(), data, true, 7); } -TEST(CodegenTest, ReturnAdd4) { +TEST_P(CodegenTest, ReturnAdd4) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT16, 3, Instruction::RETURN); - TestCode(data, true, 7); + TestCode(GetParam(), data, true, 7); } -TEST(CodegenTest, NonMaterializedCondition) { +TEST_P(CodegenTest, NonMaterializedCondition) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -583,30 +652,30 @@ TEST(CodegenTest, NonMaterializedCondition) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCodeOptimized(graph, hook_before_codegen, true, 0); + RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0); } -TEST(CodegenTest, ReturnMulInt) { +TEST_P(CodegenTest, ReturnMulInt) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT, 1 << 8 | 0, Instruction::RETURN); - TestCode(data, true, 12); + TestCode(GetParam(), data, true, 12); } -TEST(CodegenTest, ReturnMulInt2addr) { +TEST_P(CodegenTest, ReturnMulInt2addr) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(data, true, 12); + TestCode(GetParam(), data, true, 12); } -TEST(CodegenTest, ReturnMulLong) { +TEST_P(CodegenTest, ReturnMulLong) { const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 0 << 12 | 1 << 8, @@ -615,10 +684,10 @@ TEST(CodegenTest, ReturnMulLong) { Instruction::MUL_LONG, 2 << 8 | 0, Instruction::RETURN_WIDE); - TestCodeLong(data, true, 12); + TestCodeLong(GetParam(), data, true, 12); } -TEST(CodegenTest, ReturnMulLong2addr) { +TEST_P(CodegenTest, ReturnMulLong2addr) { const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0 << 8, Instruction::CONST_4 | 0 << 12 | 1 << 8, @@ -627,28 +696,28 @@ TEST(CodegenTest, ReturnMulLong2addr) { Instruction::MUL_LONG_2ADDR | 2 << 12, Instruction::RETURN_WIDE); - TestCodeLong(data, true, 12); + TestCodeLong(GetParam(), data, true, 12); } -TEST(CodegenTest, ReturnMulIntLit8) { +TEST_P(CodegenTest, ReturnMulIntLit8) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(data, true, 12); + TestCode(GetParam(), data, true, 12); } -TEST(CodegenTest, ReturnMulIntLit16) { +TEST_P(CodegenTest, ReturnMulIntLit16) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT16, 3, Instruction::RETURN); - TestCode(data, true, 12); + TestCode(GetParam(), data, true, 12); } -TEST(CodegenTest, MaterializedCondition1) { +TEST_P(CodegenTest, MaterializedCondition1) { // Check that condition are materialized correctly. A materialized condition // should yield `1` if it evaluated to true, and `0` otherwise. // We force the materialization of comparisons for different combinations of @@ -689,11 +758,11 @@ TEST(CodegenTest, MaterializedCondition1) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]); + RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } -TEST(CodegenTest, MaterializedCondition2) { +TEST_P(CodegenTest, MaterializedCondition2) { // Check that HIf correctly interprets a materialized condition. // We force the materialization of comparisons for different combinations of // inputs. An HIf takes the materialized combination as input and returns a @@ -755,31 +824,35 @@ TEST(CodegenTest, MaterializedCondition2) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]); + RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } -TEST(CodegenTest, ReturnDivIntLit8) { +TEST_P(CodegenTest, ReturnDivIntLit8) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::DIV_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(data, true, 1); + TestCode(GetParam(), data, true, 1); } -TEST(CodegenTest, ReturnDivInt2Addr) { +TEST_P(CodegenTest, ReturnDivInt2Addr) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0, Instruction::CONST_4 | 2 << 12 | 1 << 8, Instruction::DIV_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(data, true, 2); + TestCode(GetParam(), data, true, 2); } // Helper method. -static void TestComparison(IfCondition condition, int64_t i, int64_t j, Primitive::Type type) { +static void TestComparison(IfCondition condition, + int64_t i, + int64_t j, + Primitive::Type type, + const InstructionSet target_isa) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = CreateGraph(&allocator); @@ -862,46 +935,78 @@ static void TestComparison(IfCondition condition, int64_t i, int64_t j, Primitiv auto hook_before_codegen = [](HGraph*) { }; - RunCodeOptimized(graph, hook_before_codegen, true, expected_result); + RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result); } -TEST(CodegenTest, ComparisonsInt) { +TEST_P(CodegenTest, ComparisonsInt) { + const InstructionSet target_isa = GetParam(); for (int64_t i = -1; i <= 1; i++) { for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimInt); - TestComparison(kCondNE, i, j, Primitive::kPrimInt); - TestComparison(kCondLT, i, j, Primitive::kPrimInt); - TestComparison(kCondLE, i, j, Primitive::kPrimInt); - TestComparison(kCondGT, i, j, Primitive::kPrimInt); - TestComparison(kCondGE, i, j, Primitive::kPrimInt); - TestComparison(kCondB, i, j, Primitive::kPrimInt); - TestComparison(kCondBE, i, j, Primitive::kPrimInt); - TestComparison(kCondA, i, j, Primitive::kPrimInt); - TestComparison(kCondAE, i, j, Primitive::kPrimInt); + TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa); } } } -TEST(CodegenTest, ComparisonsLong) { +TEST_P(CodegenTest, ComparisonsLong) { // TODO: make MIPS work for long if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { return; } + const InstructionSet target_isa = GetParam(); + if (target_isa == kMips || target_isa == kMips64) { + return; + } + for (int64_t i = -1; i <= 1; i++) { for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimLong); - TestComparison(kCondNE, i, j, Primitive::kPrimLong); - TestComparison(kCondLT, i, j, Primitive::kPrimLong); - TestComparison(kCondLE, i, j, Primitive::kPrimLong); - TestComparison(kCondGT, i, j, Primitive::kPrimLong); - TestComparison(kCondGE, i, j, Primitive::kPrimLong); - TestComparison(kCondB, i, j, Primitive::kPrimLong); - TestComparison(kCondBE, i, j, Primitive::kPrimLong); - TestComparison(kCondA, i, j, Primitive::kPrimLong); - TestComparison(kCondAE, i, j, Primitive::kPrimLong); + TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa); } } } +static ::std::vector<InstructionSet> GetTargetISAs() { + ::std::vector<InstructionSet> v; + // Add all ISAs that are executable on hardware or on simulator. + const ::std::vector<InstructionSet> executable_isa_candidates = { + kArm, + kArm64, + kThumb2, + kX86, + kX86_64, + kMips, + kMips64 + }; + + for (auto target_isa : executable_isa_candidates) { + if (CanExecute(target_isa)) { + v.push_back(target_isa); + } + } + + return v; +} + +INSTANTIATE_TEST_CASE_P(MultipleTargets, + CodegenTest, + ::testing::ValuesIn(GetTargetISAs())); + } // namespace art diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index e1a8c9cc0f..10d83439fd 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ +#include "code_generator.h" #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" @@ -201,6 +202,11 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst return true; } + // Our code generator ensures shift distances are within an encodable range. + if (instr->IsRor()) { + return true; + } + int64_t value = CodeGenerator::GetInt64ValueOf(constant); if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) { @@ -255,6 +261,67 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, return true; } +static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kASR: return vixl::ASR; + case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL; + case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_SHIFT; + } +} + +static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB; + case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH; + case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW; + case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB; + case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH; + case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_EXTEND; + } +} + +static inline bool CanFitInShifterOperand(HInstruction* instruction) { + if (instruction->IsTypeConversion()) { + HTypeConversion* conversion = instruction->AsTypeConversion(); + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + // We don't expect to see the same type as input and result. + return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && + (result_type != input_type); + } else { + return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || + (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) || + (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant()); + } +} + +static inline bool HasShifterOperand(HInstruction* instr) { + // `neg` instructions are an alias of `sub` using the zero register as the + // first register input. + bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() || + instr->IsOr() || instr->IsSub() || instr->IsXor(); + return res; +} + +static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { + DCHECK(HasShifterOperand(instruction)); + // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` + // does *not* support extension. This is because the `extended register` form + // of the `sub` instruction interprets the left register with code 31 as the + // stack pointer and not the zero register. (So does the `immediate` form.) In + // the other form `shifted register, the register with code 31 is interpreted + // as the zero register. + return instruction->IsAdd() || instruction->IsSub(); +} + } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 02e5dab3d4..67ff87a759 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -165,6 +165,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { if (!inst->HasSideEffects() && !inst->CanThrow() && !inst->IsSuspendCheck() + && !inst->IsNativeDebugInfo() // If we added an explicit barrier then we should keep it. && !inst->IsMemoryBarrier() && !inst->IsParameterValue() diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc new file mode 100644 index 0000000000..65820630f8 --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_cache_array_fixups_arm.h" + +#include "base/arena_containers.h" +#include "utils/dex_cache_arrays_layout-inl.h" + +namespace art { +namespace arm { + +/** + * Finds instructions that need the dex cache arrays base as an input. + */ +class DexCacheArrayFixupsVisitor : public HGraphVisitor { + public: + explicit DexCacheArrayFixupsVisitor(HGraph* graph) + : HGraphVisitor(graph), + dex_cache_array_bases_(std::less<const DexFile*>(), + // Attribute memory use to code generator. + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HArmDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative access to the dex cache methods array, + // we need to add the dex cache arrays base as the special input. + if (invoke->HasPcRelativeDexCache()) { + // Initialize base for target method dex file if needed. + MethodReference target_method = invoke->GetTargetMethod(); + HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file); + base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base); + } + } + + HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { + // Ensure we only initialize the pointer once for each dex file. + auto lb = dex_cache_array_bases_.lower_bound(&dex_file); + if (lb != dex_cache_array_bases_.end() && + !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) { + return lb->second; + } + + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); + dex_cache_array_bases_.PutBefore(lb, &dex_file, base); + return base; + } + + using DexCacheArraysBaseMap = + ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>; + DexCacheArraysBaseMap dex_cache_array_bases_; +}; + +void DexCacheArrayFixups::Run() { + DexCacheArrayFixupsVisitor visitor(graph_); + visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); +} + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h new file mode 100644 index 0000000000..015f910328 --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_arm.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ +#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace arm { + +class DexCacheArrayFixups : public HOptimization { + public: + DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {} + + void Run() OVERRIDE; +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 5814d7556f..dfc363f9fd 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -735,26 +735,31 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } - // Test phi equivalents. There should not be two of the same type and they - // should only be created for constants which were untyped in DEX. - for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - HPhi* other_phi = phi_it.Current()->AsPhi(); - if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { - if (phi->GetType() == other_phi->GetType()) { - std::stringstream type_str; - type_str << phi->GetType(); - AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", - phi->GetId(), - phi->GetRegNumber(), - type_str.str().c_str())); - } else { - ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); - if (!IsConstantEquivalent(phi, other_phi, &visited)) { - AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " - "are not equivalents of constants.", + // Test phi equivalents. There should not be two of the same type and they should only be + // created for constants which were untyped in DEX. Note that this test can be skipped for + // a synthetic phi (indicated by lack of a virtual register). + if (phi->GetRegNumber() != kNoRegNumber) { + for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); + !phi_it.Done(); + phi_it.Advance()) { + HPhi* other_phi = phi_it.Current()->AsPhi(); + if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { + if (phi->GetType() == other_phi->GetType()) { + std::stringstream type_str; + type_str << phi->GetType(); + AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", phi->GetId(), - other_phi->GetId(), - phi->GetRegNumber())); + phi->GetRegNumber(), + type_str.str().c_str())); + } else { + ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); + if (!IsConstantEquivalent(phi, other_phi, &visited)) { + AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " + "are not equivalents of constants.", + phi->GetId(), + other_phi->GetId(), + phi->GetRegNumber())); + } } } } @@ -845,7 +850,7 @@ void SSAChecker::VisitCondition(HCondition* op) { void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { VisitInstruction(op); - if (op->IsUShr() || op->IsShr() || op->IsShl()) { + if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) { if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) { AddError(StringPrintf( "Shift operation %s %d has a non-int kind second input: " diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d166d0061f..e9fdb84d1e 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -393,15 +393,18 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { VisitInvoke(invoke); - StartAttributeStream("recursive") << std::boolalpha - << invoke->IsRecursive() - << std::noboolalpha; + StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind(); StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); if (invoke->IsStatic()) { StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement(); } } + void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + VisitInvoke(invoke); + StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); + } + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { StartAttributeStream("field_type") << field_access->GetFieldType(); } @@ -422,6 +425,19 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } +#ifdef ART_ENABLE_CODEGEN_arm64 + void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { + StartAttributeStream("shift") << instruction->GetShiftAmount(); + } + } + + void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } +#endif + bool IsPass(const char* name) { return strcmp(pass_name_, name) == 0; } diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index de60cf21aa..78cb7d410a 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -28,7 +28,7 @@ namespace art { TEST(GVNTest, LocalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -113,7 +113,7 @@ TEST(GVNTest, LocalFieldElimination) { TEST(GVNTest, GlobalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -196,7 +196,7 @@ TEST(GVNTest, GlobalFieldElimination) { TEST(GVNTest, LoopFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -319,7 +319,7 @@ TEST(GVNTest, LoopFieldElimination) { TEST(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC(); diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index fdf8cc9c1f..19e6cbd314 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -71,10 +71,10 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph) } void HInductionVarAnalysis::Run() { - // Detects sequence variables (generalized induction variables) during an inner-loop-first - // traversal of all loops using Gerlek's algorithm. The order is only relevant if outer - // loops would use induction information of inner loops (not currently done). - for (HPostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { + // Detects sequence variables (generalized induction variables) during an outer to inner + // traversal of all loops using Gerlek's algorithm. The order is important to enable + // range analysis on outer loop while visiting inner loops. + for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { HBasicBlock* graph_block = it_graph.Current(); if (graph_block->IsLoopHeader()) { VisitLoop(graph_block->GetLoopInformation()); @@ -705,7 +705,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::LookupInfo(HLoopInf return loop_it->second; } } - if (loop->IsLoopInvariant(instruction, true)) { + if (loop->IsDefinedOutOfTheLoop(instruction)) { + DCHECK(instruction->GetBlock()->Dominates(loop->GetPreHeader())); InductionInfo* info = CreateInvariantFetch(instruction); AssignInfo(loop, instruction, info); return info; @@ -744,8 +745,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv if (value == 1) { return b; } else if (value == -1) { - op = kNeg; - a = nullptr; + return CreateSimplifiedInvariant(kNeg, nullptr, b); } } } @@ -762,41 +762,27 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv if (value == 1) { return a; } else if (value == -1) { - op = kNeg; - b = a; - a = nullptr; + return CreateSimplifiedInvariant(kNeg, nullptr, a); } } } else if (b->operation == kNeg) { // Simplify a + (-b) = a - b, a - (-b) = a + b, -(-b) = b. if (op == kAdd) { - op = kSub; - b = b->op_b; + return CreateSimplifiedInvariant(kSub, a, b->op_b); } else if (op == kSub) { - op = kAdd; - b = b->op_b; + return CreateSimplifiedInvariant(kAdd, a, b->op_b); } else if (op == kNeg) { return b->op_b; } + } else if (b->operation == kSub) { + // Simplify - (a - b) = b - a. + if (op == kNeg) { + return CreateSimplifiedInvariant(kSub, b->op_b, b->op_a); + } } return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr); } -bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1, - InductionInfo* info2) { - // Test structural equality only, without accounting for simplifications. - if (info1 != nullptr && info2 != nullptr) { - return - info1->induction_class == info2->induction_class && - info1->operation == info2->operation && - info1->fetch == info2->fetch && - InductionEqual(info1->op_a, info2->op_a) && - InductionEqual(info1->op_b, info2->op_b); - } - // Otherwise only two nullptrs are considered equal. - return info1 == info2; -} - bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) { if (info != nullptr && info->induction_class == kInvariant) { // A direct constant fetch. @@ -811,19 +797,35 @@ bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) { } } // Use range analysis to resolve compound values. - int32_t range_value; - if (InductionVarRange::GetConstant(info, &range_value)) { - *value = range_value; + InductionVarRange range(this); + int32_t min_val = 0; + int32_t max_val = 0; + if (range.IsConstantRange(info, &min_val, &max_val) && min_val == max_val) { + *value = min_val; return true; } } return false; } +bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1, + InductionInfo* info2) { + // Test structural equality only, without accounting for simplifications. + if (info1 != nullptr && info2 != nullptr) { + return + info1->induction_class == info2->induction_class && + info1->operation == info2->operation && + info1->fetch == info2->fetch && + InductionEqual(info1->op_a, info2->op_a) && + InductionEqual(info1->op_b, info2->op_b); + } + // Otherwise only two nullptrs are considered equal. + return info1 == info2; +} + std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { if (info != nullptr) { if (info->induction_class == kInvariant) { - int64_t value = -1; std::string inv = "("; inv += InductionToString(info->op_a); switch (info->operation) { @@ -839,8 +841,10 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { case kGE: inv += " >= "; break; case kFetch: DCHECK(info->fetch); - if (IsIntAndGet(info, &value)) { - inv += std::to_string(value); + if (info->fetch->IsIntConstant()) { + inv += std::to_string(info->fetch->AsIntConstant()->GetValue()); + } else if (info->fetch->IsLongConstant()) { + inv += std::to_string(info->fetch->AsLongConstant()->GetValue()); } else { inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName(); } diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index cf354093f2..84d5d82568 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -188,9 +188,11 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* CreateConstant(int64_t value, Primitive::Type type); InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b); + // Constants. + bool IsIntAndGet(InductionInfo* info, int64_t* value); + // Helpers. static bool InductionEqual(InductionInfo* info1, InductionInfo* info2); - static bool IsIntAndGet(InductionInfo* info, int64_t* value); static std::string InductionToString(InductionInfo* info); // TODO: fine tune the following data structures, only keep relevant data. diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index b40ef5aa41..ae15fcf381 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -119,6 +119,17 @@ void InductionVarRange::GetInductionRange(HInstruction* context, } } +bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const { + Value v1 = RefineOuter(*min_val, /* is_min */ true); + Value v2 = RefineOuter(*max_val, /* is_min */ false); + if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) { + *min_val = v1; + *max_val = v2; + return true; + } + return false; +} + bool InductionVarRange::CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* needs_finite_test, @@ -156,7 +167,7 @@ void InductionVarRange::GenerateTakenTest(HInstruction* context, // Private class methods. // -bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { +bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const { if (info != nullptr) { if (info->induction_class == HInductionVarAnalysis::kLinear) { return true; @@ -167,7 +178,7 @@ bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* inf return false; } -bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { +bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const { if (trip != nullptr) { if (trip->induction_class == HInductionVarAnalysis::kInvariant) { return trip->operation == HInductionVarAnalysis::kTripCountInBody || @@ -177,7 +188,7 @@ bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* tr return false; } -bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { +bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const { if (trip != nullptr) { if (trip->induction_class == HInductionVarAnalysis::kInvariant) { return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe || @@ -187,10 +198,57 @@ bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* return false; } +InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const { + // Detect common situation where an offset inside the trip count cancels out during range + // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding + // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information + // with intermediate results that only incorporate single instructions. + if (trip != nullptr) { + HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a; + if (trip_expr->operation == HInductionVarAnalysis::kSub) { + int32_t min_value = 0; + int32_t stride_value = 0; + if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) { + if (!is_min && stride_value == 1) { + // Test original trip's negative operand (trip_expr->op_b) against + // the offset of the linear induction. + if (HInductionVarAnalysis::InductionEqual(trip_expr->op_b, info->op_b)) { + // Analyze cancelled trip with just the positive operand (trip_expr->op_a). + HInductionVarAnalysis::InductionInfo cancelled_trip( + trip->induction_class, trip->operation, trip_expr->op_a, trip->op_b, nullptr); + return GetVal(&cancelled_trip, trip, in_body, is_min); + } + } else if (is_min && stride_value == -1) { + // Test original trip's positive operand (trip_expr->op_a) against + // the offset of the linear induction. + if (HInductionVarAnalysis::InductionEqual(trip_expr->op_a, info->op_b)) { + // Analyze cancelled trip with just the negative operand (trip_expr->op_b). + HInductionVarAnalysis::InductionInfo neg( + HInductionVarAnalysis::kInvariant, + HInductionVarAnalysis::kNeg, + nullptr, + trip_expr->op_b, + nullptr); + HInductionVarAnalysis::InductionInfo cancelled_trip( + trip->induction_class, trip->operation, &neg, trip->op_b, nullptr); + return SubValue(Value(0), GetVal(&cancelled_trip, trip, in_body, !is_min)); + } + } + } + } + } + // General rule of linear induction a * i + b, for normalized 0 <= i < TC. + return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, is_min)); +} + InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, bool in_body, - bool is_min) { + bool is_min) const { // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes // more likely range analysis will compare the same instructions as terminal nodes. int32_t value; @@ -202,6 +260,8 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, } else if (IsIntAndGet(instruction->InputAt(1), &value)) { return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value)); } + } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { + return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min); } else if (is_min) { // Special case for finding minimum: minimum of trip-count in loop-body is 1. if (trip != nullptr && in_body && instruction == trip->op_a->fetch) { @@ -214,7 +274,7 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, bool in_body, - bool is_min) { + bool is_min) const { if (info != nullptr) { switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: @@ -253,13 +313,11 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct break; } break; - case HInductionVarAnalysis::kLinear: - // Linear induction a * i + b, for normalized 0 <= i < TC. - return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min), - GetVal(info->op_b, trip, in_body, is_min)); + case HInductionVarAnalysis::kLinear: { + return GetLinear(info, trip, in_body, is_min); + } case HInductionVarAnalysis::kWrapAround: case HInductionVarAnalysis::kPeriodic: - // Merge values in the wrap-around/periodic. return MergeVal(GetVal(info->op_a, trip, in_body, is_min), GetVal(info->op_b, trip, in_body, is_min), is_min); } @@ -271,11 +329,17 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, bool in_body, - bool is_min) { + bool is_min) const { Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + // Try to refine certain failure. + if (v1_min.a_constant && v1_max.a_constant) { + v1_min = RefineOuter(v1_min, /* is_min */ true); + v1_max = RefineOuter(v1_max, /* is_min */ false); + } + // Positive or negative range? if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { // Positive range vs. positive or negative range. if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { @@ -285,7 +349,7 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct return is_min ? MulValue(v1_max, v2_min) : MulValue(v1_min, v2_max); } - } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) { + } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) { // Negative range vs. positive or negative range. if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { return is_min ? MulValue(v1_min, v2_max) @@ -302,11 +366,12 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, bool in_body, - bool is_min) { + bool is_min) const { Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + // Positive or negative range? if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { // Positive range vs. positive or negative range. if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { @@ -316,7 +381,7 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return is_min ? DivValue(v1_max, v2_max) : DivValue(v1_min, v2_min); } - } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) { + } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) { // Negative range vs. positive or negative range. if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { return is_min ? DivValue(v1_min, v2_min) @@ -329,19 +394,23 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return Value(); } -bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) { - Value v_min = GetVal(info, nullptr, false, /* is_min */ true); - Value v_max = GetVal(info, nullptr, false, /* is_min */ false); - if (v_min.is_known && v_max.is_known) { - if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) { - *value = v_min.b_constant; +bool InductionVarRange::IsConstantRange(HInductionVarAnalysis::InductionInfo* info, + int32_t *min_value, + int32_t *max_value) const { + bool in_body = true; // no known trip count + Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true); + Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false); + do { + if (v_min.is_known && v_min.a_constant == 0 && v_max.is_known && v_max.a_constant == 0) { + *min_value = v_min.b_constant; + *max_value = v_max.b_constant; return true; } - } + } while (RefineOuter(&v_min, &v_max)); return false; } -InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) { +InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const { if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) { const int32_t b = v1.b_constant + v2.b_constant; if (v1.a_constant == 0) { @@ -355,7 +424,7 @@ InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) { return Value(); } -InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) { +InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) const { if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) { const int32_t b = v1.b_constant - v2.b_constant; if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) { @@ -369,7 +438,7 @@ InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) { return Value(); } -InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) { +InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) const { if (v1.is_known && v2.is_known) { if (v1.a_constant == 0) { if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) { @@ -384,7 +453,7 @@ InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) { return Value(); } -InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) { +InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) const { if (v1.is_known && v2.is_known && v1.a_constant == 0 && v2.a_constant == 0) { if (IsSafeDiv(v1.b_constant, v2.b_constant)) { return Value(v1.b_constant / v2.b_constant); @@ -393,7 +462,7 @@ InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) { return Value(); } -InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) { +InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) const { if (v1.is_known && v2.is_known) { if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) { return Value(v1.instruction, v1.a_constant, @@ -404,6 +473,25 @@ InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is return Value(); } +InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const { + if (v.instruction != nullptr) { + HLoopInformation* loop = + v.instruction->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (loop != nullptr) { + // Set up loop information. + bool in_body = true; // use is always in body of outer loop + HInductionVarAnalysis::InductionInfo* info = + induction_analysis_->LookupInfo(loop, v.instruction); + HInductionVarAnalysis::InductionInfo* trip = + induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction()); + // Try to refine "a x instruction + b" with outer loop range information on instruction. + return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), + Value(v.b_constant)); + } + } + return v; +} + bool InductionVarRange::GenerateCode(HInstruction* context, HInstruction* instruction, HGraph* graph, @@ -412,7 +500,7 @@ bool InductionVarRange::GenerateCode(HInstruction* context, /*out*/HInstruction** upper, /*out*/HInstruction** taken_test, /*out*/bool* needs_finite_test, - /*out*/bool* needs_taken_test) { + /*out*/bool* needs_taken_test) const { HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop if (loop != nullptr) { // Set up loop information. @@ -425,9 +513,13 @@ bool InductionVarRange::GenerateCode(HInstruction* context, } HInductionVarAnalysis::InductionInfo* trip = induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - // Determine what tests are needed. + // Determine what tests are needed. A finite test is needed if the evaluation code uses the + // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot" + // the computed range). A taken test is needed for any unknown trip-count, even if evaluation + // code does not use the trip-count explicitly (since there could be an implicit relation + // between e.g. an invariant subscript and a not-taken condition). *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); - *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip); + *needs_taken_test = IsBodyTripCount(trip); // Code generation for taken test: generate the code when requested or otherwise analyze // if code generation is feasible when taken test is needed. if (taken_test != nullptr) { @@ -456,7 +548,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, HBasicBlock* block, /*out*/HInstruction** result, bool in_body, - bool is_min) { + bool is_min) const { if (info != nullptr) { // Handle current operation. Primitive::Type type = Primitive::kPrimInt; @@ -512,10 +604,13 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } break; case HInductionVarAnalysis::kFetch: - if (graph != nullptr) { - *result = info->fetch; // already in HIR + if (info->fetch->GetType() == type) { + if (graph != nullptr) { + *result = info->fetch; // already in HIR + } + return true; } - return true; + break; case HInductionVarAnalysis::kTripCountInLoop: case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (!in_body && !is_min) { // one extra! @@ -545,29 +640,43 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } break; case HInductionVarAnalysis::kLinear: { - // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only - // to avoid arithmetic wrap-around situations that are hard to guard against. - int32_t stride_value = 0; - if (GetConstant(info->op_a, &stride_value)) { - if (stride_value == 1 || stride_value == -1) { - const bool is_min_a = stride_value == 1 ? is_min : !is_min; - if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && - GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { - if (graph != nullptr) { - HInstruction* oper; - if (stride_value == 1) { - oper = new (graph->GetArena()) HAdd(type, opa, opb); - } else { - oper = new (graph->GetArena()) HSub(type, opb, opa); - } - *result = Insert(block, oper); + // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only + // to avoid arithmetic wrap-around situations that are hard to guard against. + int32_t min_value = 0; + int32_t stride_value = 0; + if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) { + if (stride_value == 1 || stride_value == -1) { + const bool is_min_a = stride_value == 1 ? is_min : !is_min; + if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && + GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { + if (graph != nullptr) { + HInstruction* oper; + if (stride_value == 1) { + oper = new (graph->GetArena()) HAdd(type, opa, opb); + } else { + oper = new (graph->GetArena()) HSub(type, opb, opa); } - return true; + *result = Insert(block, oper); } + return true; } } } break; + } + case HInductionVarAnalysis::kWrapAround: + case HInductionVarAnalysis::kPeriodic: { + // Wrap-around and periodic inductions are restricted to constants only, so that extreme + // values are easy to test at runtime without complications of arithmetic wrap-around. + Value extreme = GetVal(info, trip, in_body, is_min); + if (extreme.is_known && extreme.a_constant == 0) { + if (graph != nullptr) { + *result = graph->GetIntConstant(extreme.b_constant); + } + return true; + } + break; + } default: break; } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 7984871b08..974b8fba06 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -68,6 +68,9 @@ class InductionVarRange { /*out*/Value* max_val, /*out*/bool* needs_finite_test); + /** Refines the values with induction of next outer loop. Returns true on change. */ + bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const; + /** * Returns true if range analysis is able to generate code for the lower and upper * bound expressions on the instruction in the given context. The need_finite_test @@ -113,40 +116,48 @@ class InductionVarRange { /*out*/HInstruction** taken_test); private: - // - // Private helper methods. - // - - static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info); - static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip); - static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip); - - static Value GetFetch(HInstruction* instruction, - HInductionVarAnalysis::InductionInfo* trip, - bool in_body, - bool is_min); - static Value GetVal(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip, - bool in_body, - bool is_min); - static Value GetMul(HInductionVarAnalysis::InductionInfo* info1, - HInductionVarAnalysis::InductionInfo* info2, - HInductionVarAnalysis::InductionInfo* trip, - bool in_body, - bool is_min); - static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, - HInductionVarAnalysis::InductionInfo* info2, - HInductionVarAnalysis::InductionInfo* trip, - bool in_body, - bool is_min); - - static bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value); - - static Value AddValue(Value v1, Value v2); - static Value SubValue(Value v1, Value v2); - static Value MulValue(Value v1, Value v2); - static Value DivValue(Value v1, Value v2); - static Value MergeVal(Value v1, Value v2, bool is_min); + bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const; + bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const; + bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const; + + Value GetLinear(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; + Value GetFetch(HInstruction* instruction, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; + Value GetVal(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; + Value GetMul(HInductionVarAnalysis::InductionInfo* info1, + HInductionVarAnalysis::InductionInfo* info2, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; + Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, + HInductionVarAnalysis::InductionInfo* info2, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; + + bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info, + int32_t *min_value, + int32_t *max_value) const; + + Value AddValue(Value v1, Value v2) const; + Value SubValue(Value v1, Value v2) const; + Value MulValue(Value v1, Value v2) const; + Value DivValue(Value v1, Value v2) const; + Value MergeVal(Value v1, Value v2, bool is_min) const; + + /** + * Returns refined value using induction of next outer loop or the input value if no + * further refinement is possible. + */ + Value RefineOuter(Value val, bool is_min) const; /** * Generates code for lower/upper/taken-test in the HIR. Returns true on success. @@ -161,15 +172,15 @@ class InductionVarRange { /*out*/HInstruction** upper, /*out*/HInstruction** taken_test, /*out*/bool* needs_finite_test, - /*out*/bool* needs_taken_test); - - static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** result, - bool in_body, - bool is_min); + /*out*/bool* needs_taken_test) const; + + bool GenerateCode(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** result, + bool in_body, + bool is_min) const; /** Results of prior induction variable analysis. */ HInductionVarAnalysis *induction_analysis_; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index c2ba157ed8..5c0bdd7c4c 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -31,9 +31,12 @@ using Value = InductionVarRange::Value; */ class InductionVarRangeTest : public testing::Test { public: - InductionVarRangeTest() : pool_(), allocator_(&pool_) { - graph_ = CreateGraph(&allocator_); - iva_ = new (&allocator_) HInductionVarAnalysis(graph_); + InductionVarRangeTest() + : pool_(), + allocator_(&pool_), + graph_(CreateGraph(&allocator_)), + iva_(new (&allocator_) HInductionVarAnalysis(graph_)), + range_(iva_) { BuildGraph(); } @@ -59,6 +62,11 @@ class InductionVarRangeTest : public testing::Test { graph_->AddBlock(exit_block_); graph_->SetEntryBlock(entry_block_); graph_->SetExitBlock(exit_block_); + // Two parameters. + x_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(x_); + y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(y_); } /** Constructs loop with given upper bound. */ @@ -103,7 +111,7 @@ class InductionVarRangeTest : public testing::Test { exit_block_->AddInstruction(new (&allocator_) HExit()); } - /** Performs induction variable analysis. */ + /** Constructs SSA and performs induction variable analysis. */ void PerformInductionVarAnalysis() { ASSERT_TRUE(graph_->TryBuildingSsa()); iva_->Run(); @@ -180,49 +188,51 @@ class InductionVarRangeTest : public testing::Test { // bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { - return InductionVarRange::NeedsTripCount(info); + return range_.NeedsTripCount(info); } bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { - return InductionVarRange::IsBodyTripCount(trip); + return range_.IsBodyTripCount(trip); } bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { - return InductionVarRange::IsUnsafeTripCount(trip); + return range_.IsUnsafeTripCount(trip); } Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { - return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true); + return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ true); } Value GetMax(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { - return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ false); + return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ false); } Value GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return InductionVarRange::GetMul(info1, info2, nullptr, /* in_body */ true, is_min); + return range_.GetMul(info1, info2, nullptr, /* in_body */ true, is_min); } Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return InductionVarRange::GetDiv(info1, info2, nullptr, /* in_body */ true, is_min); + return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min); } - bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t* value) { - return InductionVarRange::GetConstant(info, value); + bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info, + int32_t* min_value, + int32_t* max_value) { + return range_.IsConstantRange(info, min_value, max_value); } - Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2); } - Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2); } - Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2); } - Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2); } - Value MinValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, true); } - Value MaxValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, false); } + Value AddValue(Value v1, Value v2) { return range_.AddValue(v1, v2); } + Value SubValue(Value v1, Value v2) { return range_.SubValue(v1, v2); } + Value MulValue(Value v1, Value v2) { return range_.MulValue(v1, v2); } + Value DivValue(Value v1, Value v2) { return range_.DivValue(v1, v2); } + Value MinValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, true); } + Value MaxValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, false); } // General building fields. ArenaPool pool_; @@ -232,16 +242,17 @@ class InductionVarRangeTest : public testing::Test { HBasicBlock* exit_block_; HBasicBlock* loop_preheader_; HInductionVarAnalysis* iva_; + InductionVarRange range_; // Instructions. HInstruction* condition_; HInstruction* increment_; - HReturnVoid x_; - HReturnVoid y_; + HInstruction* x_; + HInstruction* y_; }; // -// Tests on static methods. +// Tests on private methods. // TEST_F(InductionVarRangeTest, TripCountProperties) { @@ -274,14 +285,14 @@ TEST_F(InductionVarRangeTest, GetMinMaxAdd) { GetMin(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr)); ExpectEqual(Value(22), GetMax(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr)); - ExpectEqual(Value(&x_, 1, -20), - GetMin(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr)); - ExpectEqual(Value(&x_, 1, -10), - GetMax(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr)); - ExpectEqual(Value(&x_, 1, 10), - GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr)); - ExpectEqual(Value(&x_, 1, 20), - GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr)); + ExpectEqual(Value(x_, 1, -20), + GetMin(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr)); + ExpectEqual(Value(x_, 1, -10), + GetMax(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr)); + ExpectEqual(Value(x_, 1, 10), + GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr)); + ExpectEqual(Value(x_, 1, 20), + GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr)); ExpectEqual(Value(5), GetMin(CreateInvariant('+', CreateRange(-5, -1), CreateRange(10, 20)), nullptr)); ExpectEqual(Value(19), @@ -293,14 +304,14 @@ TEST_F(InductionVarRangeTest, GetMinMaxSub) { GetMin(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr)); ExpectEqual(Value(-8), GetMax(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr)); - ExpectEqual(Value(&x_, 1, 10), - GetMin(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr)); - ExpectEqual(Value(&x_, 1, 20), - GetMax(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr)); - ExpectEqual(Value(&x_, -1, 10), - GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr)); - ExpectEqual(Value(&x_, -1, 20), - GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr)); + ExpectEqual(Value(x_, 1, 10), + GetMin(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr)); + ExpectEqual(Value(x_, 1, 20), + GetMax(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr)); + ExpectEqual(Value(x_, -1, 10), + GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr)); + ExpectEqual(Value(x_, -1, 20), + GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr)); ExpectEqual(Value(-25), GetMin(CreateInvariant('-', CreateRange(-5, -1), CreateRange(10, 20)), nullptr)); ExpectEqual(Value(-11), @@ -312,8 +323,8 @@ TEST_F(InductionVarRangeTest, GetMinMaxNeg) { ExpectEqual(Value(-10), GetMax(CreateInvariant('n', nullptr, CreateRange(10, 20)), nullptr)); ExpectEqual(Value(10), GetMin(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr)); ExpectEqual(Value(20), GetMax(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr)); - ExpectEqual(Value(&x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr)); - ExpectEqual(Value(&x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr)); + ExpectEqual(Value(x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr)); + ExpectEqual(Value(x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr)); } TEST_F(InductionVarRangeTest, GetMinMaxMul) { @@ -336,8 +347,8 @@ TEST_F(InductionVarRangeTest, GetMinMaxConstant) { } TEST_F(InductionVarRangeTest, GetMinMaxFetch) { - ExpectEqual(Value(&x_, 1, 0), GetMin(CreateFetch(&x_), nullptr)); - ExpectEqual(Value(&x_, 1, 0), GetMax(CreateFetch(&x_), nullptr)); + ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(x_), nullptr)); + ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(x_), nullptr)); } TEST_F(InductionVarRangeTest, GetMinMaxLinear) { @@ -364,45 +375,70 @@ TEST_F(InductionVarRangeTest, GetMinMaxPeriodic) { TEST_F(InductionVarRangeTest, GetMulMin) { ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true)); ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true)); + ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), true)); ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), true)); ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), true)); + ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), true)); + ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), true)); + ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), true)); + ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), true)); } TEST_F(InductionVarRangeTest, GetMulMax) { ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false)); ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false)); + ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), false)); ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), false)); ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), false)); + ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), false)); + ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), false)); + ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), false)); + ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), false)); } TEST_F(InductionVarRangeTest, GetDivMin) { ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true)); ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true)); + ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), true)); ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), true)); ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), true)); + ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), true)); + ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), true)); + ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, -40), true)); + ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), true)); } TEST_F(InductionVarRangeTest, GetDivMax) { ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false)); ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false)); + ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), false)); ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), false)); ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), false)); + ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), false)); + ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), false)); + ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, 40), false)); + ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), false)); } -TEST_F(InductionVarRangeTest, GetConstant) { - int32_t value; - ASSERT_TRUE(GetConstant(CreateConst(12345), &value)); - EXPECT_EQ(12345, value); - EXPECT_FALSE(GetConstant(CreateRange(1, 2), &value)); +TEST_F(InductionVarRangeTest, IsConstantRange) { + int32_t min_value; + int32_t max_value; + ASSERT_TRUE(IsConstantRange(CreateConst(12345), &min_value, &max_value)); + EXPECT_EQ(12345, min_value); + EXPECT_EQ(12345, max_value); + ASSERT_TRUE(IsConstantRange(CreateRange(1, 2), &min_value, &max_value)); + EXPECT_EQ(1, min_value); + EXPECT_EQ(2, max_value); + EXPECT_FALSE(IsConstantRange(CreateFetch(x_), &min_value, &max_value)); } TEST_F(InductionVarRangeTest, AddValue) { ExpectEqual(Value(110), AddValue(Value(10), Value(100))); - ExpectEqual(Value(-5), AddValue(Value(&x_, 1, -4), Value(&x_, -1, -1))); - ExpectEqual(Value(&x_, 3, -5), AddValue(Value(&x_, 2, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(&x_, 1, 23), AddValue(Value(&x_, 1, 20), Value(3))); - ExpectEqual(Value(&y_, 1, 5), AddValue(Value(55), Value(&y_, 1, -50))); + ExpectEqual(Value(-5), AddValue(Value(x_, 1, -4), Value(x_, -1, -1))); + ExpectEqual(Value(x_, 3, -5), AddValue(Value(x_, 2, -4), Value(x_, 1, -1))); + ExpectEqual(Value(), AddValue(Value(x_, 1, 5), Value(y_, 1, -7))); + ExpectEqual(Value(x_, 1, 23), AddValue(Value(x_, 1, 20), Value(3))); + ExpectEqual(Value(y_, 1, 5), AddValue(Value(55), Value(y_, 1, -50))); const int32_t max_value = std::numeric_limits<int32_t>::max(); ExpectEqual(Value(max_value), AddValue(Value(max_value - 5), Value(5))); ExpectEqual(Value(), AddValue(Value(max_value - 5), Value(6))); // unsafe @@ -410,11 +446,11 @@ TEST_F(InductionVarRangeTest, AddValue) { TEST_F(InductionVarRangeTest, SubValue) { ExpectEqual(Value(-90), SubValue(Value(10), Value(100))); - ExpectEqual(Value(-3), SubValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(&x_, 2, -3), SubValue(Value(&x_, 3, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(&x_, 1, 17), SubValue(Value(&x_, 1, 20), Value(3))); - ExpectEqual(Value(&y_, -4, 105), SubValue(Value(55), Value(&y_, 4, -50))); + ExpectEqual(Value(-3), SubValue(Value(x_, 1, -4), Value(x_, 1, -1))); + ExpectEqual(Value(x_, 2, -3), SubValue(Value(x_, 3, -4), Value(x_, 1, -1))); + ExpectEqual(Value(), SubValue(Value(x_, 1, 5), Value(y_, 1, -7))); + ExpectEqual(Value(x_, 1, 17), SubValue(Value(x_, 1, 20), Value(3))); + ExpectEqual(Value(y_, -4, 105), SubValue(Value(55), Value(y_, 4, -50))); const int32_t min_value = std::numeric_limits<int32_t>::min(); ExpectEqual(Value(min_value), SubValue(Value(min_value + 5), Value(5))); ExpectEqual(Value(), SubValue(Value(min_value + 5), Value(6))); // unsafe @@ -422,136 +458,140 @@ TEST_F(InductionVarRangeTest, SubValue) { TEST_F(InductionVarRangeTest, MulValue) { ExpectEqual(Value(1000), MulValue(Value(10), Value(100))); - ExpectEqual(Value(), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(&x_, 9, 60), MulValue(Value(&x_, 3, 20), Value(3))); - ExpectEqual(Value(&y_, 55, -110), MulValue(Value(55), Value(&y_, 1, -2))); + ExpectEqual(Value(), MulValue(Value(x_, 1, -4), Value(x_, 1, -1))); + ExpectEqual(Value(), MulValue(Value(x_, 1, 5), Value(y_, 1, -7))); + ExpectEqual(Value(x_, 9, 60), MulValue(Value(x_, 3, 20), Value(3))); + ExpectEqual(Value(y_, 55, -110), MulValue(Value(55), Value(y_, 1, -2))); ExpectEqual(Value(), MulValue(Value(90000), Value(-90000))); // unsafe } TEST_F(InductionVarRangeTest, DivValue) { ExpectEqual(Value(25), DivValue(Value(100), Value(4))); - ExpectEqual(Value(), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(), DivValue(Value(&x_, 12, 24), Value(3))); - ExpectEqual(Value(), DivValue(Value(55), Value(&y_, 1, -50))); + ExpectEqual(Value(), DivValue(Value(x_, 1, -4), Value(x_, 1, -1))); + ExpectEqual(Value(), DivValue(Value(x_, 1, 5), Value(y_, 1, -7))); + ExpectEqual(Value(), DivValue(Value(x_, 12, 24), Value(3))); + ExpectEqual(Value(), DivValue(Value(55), Value(y_, 1, -50))); ExpectEqual(Value(), DivValue(Value(1), Value(0))); // unsafe } TEST_F(InductionVarRangeTest, MinValue) { ExpectEqual(Value(10), MinValue(Value(10), Value(100))); - ExpectEqual(Value(&x_, 1, -4), MinValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(&x_, 4, -4), MinValue(Value(&x_, 4, -4), Value(&x_, 4, -1))); - ExpectEqual(Value(), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(), MinValue(Value(&x_, 1, 20), Value(3))); - ExpectEqual(Value(), MinValue(Value(55), Value(&y_, 1, -50))); + ExpectEqual(Value(x_, 1, -4), MinValue(Value(x_, 1, -4), Value(x_, 1, -1))); + ExpectEqual(Value(x_, 4, -4), MinValue(Value(x_, 4, -4), Value(x_, 4, -1))); + ExpectEqual(Value(), MinValue(Value(x_, 1, 5), Value(y_, 1, -7))); + ExpectEqual(Value(), MinValue(Value(x_, 1, 20), Value(3))); + ExpectEqual(Value(), MinValue(Value(55), Value(y_, 1, -50))); } TEST_F(InductionVarRangeTest, MaxValue) { ExpectEqual(Value(100), MaxValue(Value(10), Value(100))); - ExpectEqual(Value(&x_, 1, -1), MaxValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(&x_, 4, -1), MaxValue(Value(&x_, 4, -4), Value(&x_, 4, -1))); - ExpectEqual(Value(), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(), MaxValue(Value(&x_, 1, 20), Value(3))); - ExpectEqual(Value(), MaxValue(Value(55), Value(&y_, 1, -50))); + ExpectEqual(Value(x_, 1, -1), MaxValue(Value(x_, 1, -4), Value(x_, 1, -1))); + ExpectEqual(Value(x_, 4, -1), MaxValue(Value(x_, 4, -4), Value(x_, 4, -1))); + ExpectEqual(Value(), MaxValue(Value(x_, 1, 5), Value(y_, 1, -7))); + ExpectEqual(Value(), MaxValue(Value(x_, 1, 20), Value(3))); + ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50))); } // -// Tests on instance methods. +// Tests on public methods. // TEST_F(InductionVarRangeTest, ConstantTripCountUp) { BuildLoop(0, graph_->GetIntConstant(1000), 1); PerformInductionVarAnalysis(); - InductionVarRange range(iva_); Value v1, v2; bool needs_finite_test = true; // In context of header: known. - range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); - range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); + range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); } TEST_F(InductionVarRangeTest, ConstantTripCountDown) { BuildLoop(1000, graph_->GetIntConstant(0), -1); PerformInductionVarAnalysis(); - InductionVarRange range(iva_); Value v1, v2; bool needs_finite_test = true; // In context of header: known. - range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); - range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); + range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); } TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); - entry_block_->AddInstruction(parameter); - BuildLoop(0, parameter, 1); + BuildLoop(0, x_, 1); PerformInductionVarAnalysis(); - InductionVarRange range(iva_); Value v1, v2; bool needs_finite_test = true; bool needs_taken_test = true; // In context of header: upper unknown. - range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); - ExpectEqual(Value(parameter, 1, -1), v2); - range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + ExpectEqual(Value(x_, 1, -1), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); + range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); - ExpectEqual(Value(parameter, 1, 0), v2); + ExpectEqual(Value(x_, 1, 0), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); HInstruction* lower = nullptr; HInstruction* upper = nullptr; HInstruction* taken = nullptr; // Can generate code in context of loop-body only. - EXPECT_FALSE(range.CanGenerateCode( + EXPECT_FALSE(range_.CanGenerateCode( condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); - ASSERT_TRUE(range.CanGenerateCode( + ASSERT_TRUE(range_.CanGenerateCode( increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); EXPECT_FALSE(needs_finite_test); EXPECT_TRUE(needs_taken_test); // Generates code. - range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); + range_.GenerateRangeCode( + increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); // Verify lower is 0+0. ASSERT_TRUE(lower != nullptr); @@ -572,7 +612,7 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); // Verify taken-test is 0<V. - range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); ASSERT_TRUE(taken != nullptr); ASSERT_TRUE(taken->IsLessThan()); ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); @@ -581,49 +621,49 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { } TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); - entry_block_->AddInstruction(parameter); - BuildLoop(1000, parameter, -1); + BuildLoop(1000, x_, -1); PerformInductionVarAnalysis(); - InductionVarRange range(iva_); Value v1, v2; bool needs_finite_test = true; bool needs_taken_test = true; // In context of header: lower unknown. - range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(), v1); ExpectEqual(Value(1000), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); - ExpectEqual(Value(parameter, 1, 1), v1); + ExpectEqual(Value(x_, 1, 1), v1); ExpectEqual(Value(1000), v2); - range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); + range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); - ExpectEqual(Value(parameter, 1, 0), v1); + ExpectEqual(Value(x_, 1, 0), v1); ExpectEqual(Value(999), v2); + EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); HInstruction* lower = nullptr; HInstruction* upper = nullptr; HInstruction* taken = nullptr; // Can generate code in context of loop-body only. - EXPECT_FALSE(range.CanGenerateCode( + EXPECT_FALSE(range_.CanGenerateCode( condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); - ASSERT_TRUE(range.CanGenerateCode( + ASSERT_TRUE(range_.CanGenerateCode( increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); EXPECT_FALSE(needs_finite_test); EXPECT_TRUE(needs_taken_test); // Generates code. - range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); + range_.GenerateRangeCode( + increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); - // Verify lower is 1000-(-(V-1000)-1). + // Verify lower is 1000-((1000-V)-1). ASSERT_TRUE(lower != nullptr); ASSERT_TRUE(lower->IsSub()); ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); @@ -633,12 +673,10 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue()); lower = lower->InputAt(0); - ASSERT_TRUE(lower->IsNeg()); - lower = lower->InputAt(0); ASSERT_TRUE(lower->IsSub()); - EXPECT_TRUE(lower->InputAt(0)->IsParameterValue()); - ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); - EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue()); + ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue()); + EXPECT_TRUE(lower->InputAt(1)->IsParameterValue()); // Verify upper is 1000-0. ASSERT_TRUE(upper != nullptr); @@ -649,7 +687,7 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); // Verify taken-test is 1000>V. - range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); ASSERT_TRUE(taken != nullptr); ASSERT_TRUE(taken->IsGreaterThan()); ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 0363f203b2..a4dcb3aeba 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -171,13 +171,37 @@ static uint32_t FindMethodIndexIn(ArtMethod* method, const DexFile& dex_file, uint32_t referrer_index) SHARED_REQUIRES(Locks::mutator_lock_) { - if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) { + if (IsSameDexFile(*method->GetDexFile(), dex_file)) { return method->GetDexMethodIndex(); } else { return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index); } } +static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (cls->GetDexCache() == nullptr) { + DCHECK(cls->IsArrayClass()); + // TODO: find the class in `dex_file`. + return DexFile::kDexNoIndex; + } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) { + // TODO: deal with proxy classes. + return DexFile::kDexNoIndex; + } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) { + // Update the dex cache to ensure the class is in. The generated code will + // consider it is. We make it safe by updating the dex cache, as other + // dex files might also load the class, and there is no guarantee the dex + // cache of the dex file of the class will be updated. + if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) { + cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls); + } + return cls->GetDexTypeIndex(); + } else { + // TODO: find the class in `dex_file`. + return DexFile::kDexNoIndex; + } +} + bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved()) { return false; // Don't bother to move further if we know the method is unresolved. @@ -192,6 +216,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { // We can query the dex cache directly. The verifier has populated it already. ArtMethod* resolved_method; if (invoke_instruction->IsInvokeStaticOrDirect()) { + if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) { + VLOG(compiler) << "Not inlining a String.<init> method"; + return false; + } MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod(); mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file) ? caller_compilation_unit_.GetDexCache().Get() @@ -210,53 +238,176 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; } - if (!invoke_instruction->IsInvokeStaticOrDirect()) { - resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); - if (resolved_method == nullptr) { + if (invoke_instruction->IsInvokeStaticOrDirect()) { + return TryInline(invoke_instruction, resolved_method); + } + + // Check if we can statically find the method. + ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); + if (actual_method != nullptr) { + return TryInline(invoke_instruction, actual_method); + } + + // Check if we can use an inline cache. + ArtMethod* caller = graph_->GetArtMethod(); + size_t pointer_size = class_linker->GetImagePointerSize(); + // Under JIT, we should always know the caller. + DCHECK(!Runtime::Current()->UseJit() || (caller != nullptr)); + if (caller != nullptr && caller->GetProfilingInfo(pointer_size) != nullptr) { + ProfilingInfo* profiling_info = caller->GetProfilingInfo(pointer_size); + const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()); + if (ic.IsUnitialized()) { VLOG(compiler) << "Interface or virtual call to " << PrettyMethod(method_index, caller_dex_file) - << " could not be statically determined"; + << " is not hit and not inlined"; return false; - } - // We have found a method, but we need to find where that method is for the caller's - // dex file. - method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index); - if (method_index == DexFile::kDexNoIndex) { + } else if (ic.IsMonomorphic()) { + MaybeRecordStat(kMonomorphicCall); + return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic); + } else if (ic.IsPolymorphic()) { + MaybeRecordStat(kPolymorphicCall); + return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic); + } else { + DCHECK(ic.IsMegamorphic()); VLOG(compiler) << "Interface or virtual call to " - << PrettyMethod(resolved_method) - << " cannot be inlined because unaccessible to caller"; + << PrettyMethod(method_index, caller_dex_file) + << " is megamorphic and not inlined"; + MaybeRecordStat(kMegamorphicCall); return false; } } - bool same_dex_file = - IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile()); + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(method_index, caller_dex_file) + << " could not be statically determined"; + return false; +} - const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); +bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, + ArtMethod* resolved_method, + const InlineCache& ic) { + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file); + if (class_index == DexFile::kDexNoIndex) { + VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) + << " from inline cache is not inlined because its class is not" + << " accessible to the caller"; + return false; + } + + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + size_t pointer_size = class_linker->GetImagePointerSize(); + if (invoke_instruction->IsInvokeInterface()) { + resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface( + resolved_method, pointer_size); + } else { + DCHECK(invoke_instruction->IsInvokeVirtual()); + resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual( + resolved_method, pointer_size); + } + DCHECK(resolved_method != nullptr); + HInstruction* receiver = invoke_instruction->InputAt(0); + HInstruction* cursor = invoke_instruction->GetPrevious(); + HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); + + if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) { + return false; + } + + // We successfully inlined, now add a guard. + ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); + HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet( + receiver, + Primitive::kPrimNot, + field->GetOffset(), + field->IsVolatile(), + field->GetDexFieldIndex(), + field->GetDeclaringClass()->GetDexClassDefIndex(), + *field->GetDexFile(), + handles_->NewHandle(field->GetDexCache()), + invoke_instruction->GetDexPc()); + + bool is_referrer = + (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(), + class_index, + caller_dex_file, + is_referrer, + invoke_instruction->GetDexPc(), + /* needs_access_check */ false, + /* is_in_dex_cache */ true); + + HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get); + HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( + compare, invoke_instruction->GetDexPc()); + // TODO: Extend reference type propagation to understand the guard. + if (cursor != nullptr) { + bb_cursor->InsertInstructionAfter(load_class, cursor); + } else { + bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction()); + } + bb_cursor->InsertInstructionAfter(field_get, load_class); + bb_cursor->InsertInstructionAfter(compare, field_get); + bb_cursor->InsertInstructionAfter(deoptimize, compare); + deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + + // Run type propagation to get the guard typed, and eventually propagate the + // type of the receiver. + ReferenceTypePropagation rtp_fixup(graph_, handles_); + rtp_fixup.Run(); + + MaybeRecordStat(kInlinedMonomorphicCall); + return true; +} + +bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED, + ArtMethod* resolved_method, + const InlineCache& ic ATTRIBUTE_UNUSED) { + // TODO + VLOG(compiler) << "Unimplemented polymorphic inlining for " + << PrettyMethod(resolved_method); + return false; +} + +bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) { + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + uint32_t method_index = FindMethodIndexIn( + method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); + if (method_index == DexFile::kDexNoIndex) { + VLOG(compiler) << "Call to " + << PrettyMethod(method) + << " cannot be inlined because unaccessible to caller"; + return false; + } + + bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile()); + + const DexFile::CodeItem* code_item = method->GetCodeItem(); if (code_item == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method) << " is not inlined because it is native"; return false; } size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method) << " is too big to inline"; return false; } if (code_item->tries_size_ != 0) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method) << " is not inlined because of try block"; return false; } - if (!resolved_method->GetDeclaringClass()->IsVerified()) { - uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex(); + if (!method->GetDeclaringClass()->IsVerified()) { + uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); if (!compiler_driver_->IsMethodVerifiedWithoutFailures( - resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) { + method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " couldn't be verified, so it cannot be inlined"; return false; @@ -273,7 +424,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; } - if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) { + if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) { return false; } @@ -284,7 +435,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - bool same_dex_file) { + bool same_dex_file, + bool do_rtp) { ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); @@ -337,6 +489,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, invoke_type, graph_->IsDebuggable(), graph_->GetCurrentInstructionId()); + callee_graph->SetArtMethod(resolved_method); OptimizingCompilerStats inline_stats; HGraphBuilder builder(callee_graph, @@ -418,6 +571,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, size_t number_of_instructions_budget = kMaximumNumberOfHInstructions; if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) { HInliner inliner(callee_graph, + outermost_graph_, codegen_, outer_compilation_unit_, dex_compilation_unit, @@ -529,9 +683,9 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HNullConstant* null_constant = graph_->GetNullConstant(); if (!null_constant->GetReferenceTypeInfo().IsValid()) { ReferenceTypeInfo::TypeHandle obj_handle = - handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject)); + handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject)); null_constant->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(obj_handle, false /* is_exact */)); + ReferenceTypeInfo::Create(obj_handle, false /* is_exact */)); } // Check the integrity of reference types and run another type propagation if needed. @@ -550,14 +704,16 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); } - // If the return type is a refinement of the declared type run the type propagation again. - ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); - ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); - if (invoke_rti.IsStrictSupertypeOf(return_rti) - || (return_rti.IsExact() && !invoke_rti.IsExact()) - || !return_replacement->CanBeNull()) { - ReferenceTypePropagation rtp_fixup(graph_, handles_); - rtp_fixup.Run(); + if (do_rtp) { + // If the return type is a refinement of the declared type run the type propagation again. + ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); + ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); + if (invoke_rti.IsStrictSupertypeOf(return_rti) + || (return_rti.IsExact() && !invoke_rti.IsExact()) + || !return_replacement->CanBeNull()) { + ReferenceTypePropagation rtp_fixup(graph_, handles_); + rtp_fixup.Run(); + } } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 0f6a9453be..7b9fb73ccf 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -27,11 +27,13 @@ class CompilerDriver; class DexCompilationUnit; class HGraph; class HInvoke; +class InlineCache; class OptimizingCompilerStats; class HInliner : public HOptimization { public: HInliner(HGraph* outer_graph, + HGraph* outermost_graph, CodeGenerator* codegen, const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, @@ -40,6 +42,7 @@ class HInliner : public HOptimization { OptimizingCompilerStats* stats, size_t depth = 0) : HOptimization(outer_graph, kInlinerPassName, stats), + outermost_graph_(outermost_graph), outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), codegen_(codegen), @@ -54,10 +57,33 @@ class HInliner : public HOptimization { private: bool TryInline(HInvoke* invoke_instruction); + + // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether + // reference type propagation can run after the inlining. + bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true) + SHARED_REQUIRES(Locks::mutator_lock_); + + // Try to inline the target of a monomorphic call. If successful, the code + // in the graph will look like: + // if (receiver.getClass() != ic.GetMonomorphicType()) deopt + // ... // inlined code + bool TryInlineMonomorphicCall(HInvoke* invoke_instruction, + ArtMethod* resolved_method, + const InlineCache& ic) + SHARED_REQUIRES(Locks::mutator_lock_); + + // Try to inline targets of a polymorphic call. Currently unimplemented. + bool TryInlinePolymorphicCall(HInvoke* invoke_instruction, + ArtMethod* resolved_method, + const InlineCache& ic) + SHARED_REQUIRES(Locks::mutator_lock_); + bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - bool same_dex_file); + bool same_dex_file, + bool do_rtp = true); + HGraph* const outermost_graph_; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CodeGenerator* const codegen_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2f3df7fc68..c504ded54c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -39,6 +39,12 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { } } + bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryReplaceWithRotate(HBinaryOperation* instruction); + bool TryReplaceWithRotateConstantPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl); + bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop); void VisitShift(HBinaryOperation* shift); @@ -77,6 +83,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; + void SimplifyRotate(HInvoke* invoke, bool is_left); void SimplifySystemArrayCopy(HInvoke* invoke); void SimplifyStringEquals(HInvoke* invoke); @@ -173,6 +180,161 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { } } +static bool IsSubRegBitsMinusOther(HSub* sub, size_t reg_bits, HInstruction* other) { + return (sub->GetRight() == other && + sub->GetLeft()->IsConstant() && + (Int64FromConstant(sub->GetLeft()->AsConstant()) & (reg_bits - 1)) == 0); +} + +bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(), + ushr->GetLeft(), + ushr->GetRight()); + op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror); + if (!ushr->HasUses()) { + ushr->GetBlock()->RemoveInstruction(ushr); + } + if (!ushr->GetRight()->HasUses()) { + ushr->GetRight()->GetBlock()->RemoveInstruction(ushr->GetRight()); + } + if (!shl->HasUses()) { + shl->GetBlock()->RemoveInstruction(shl); + } + if (!shl->GetRight()->HasUses()) { + shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight()); + } + return true; +} + +// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation. +bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) { + // This simplification is currently supported on x86, x86_64, ARM and ARM64. + // TODO: Implement it for MIPS/64. + const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); + switch (instruction_set) { + case kArm: + case kArm64: + case kThumb2: + case kX86: + case kX86_64: + break; + default: + return false; + } + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + HInstruction* left = op->GetLeft(); + HInstruction* right = op->GetRight(); + // If we have an UShr and a Shl (in either order). + if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) { + HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr(); + HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl(); + DCHECK(Primitive::IsIntOrLongType(ushr->GetType())); + if (ushr->GetType() == shl->GetType() && + ushr->GetLeft() == shl->GetLeft()) { + if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) { + // Shift distances are both constant, try replacing with Ror if they + // add up to the register size. + return TryReplaceWithRotateConstantPattern(op, ushr, shl); + } else if (ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()) { + // Shift distances are potentially of the form x and (reg_size - x). + return TryReplaceWithRotateRegisterSubPattern(op, ushr, shl); + } else if (ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()) { + // Shift distances are potentially of the form d and -d. + return TryReplaceWithRotateRegisterNegPattern(op, ushr, shl); + } + } + } + return false; +} + +// Try replacing code looking like (x >>> #rdist OP x << #ldist): +// UShr dst, x, #rdist +// Shl tmp, x, #ldist +// OP dst, dst, tmp +// or like (x >>> #rdist OP x << #-ldist): +// UShr dst, x, #rdist +// Shl tmp, x, #-ldist +// OP dst, dst, tmp +// with +// Ror dst, x, #rdist +bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte; + size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant()); + size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant()); + if (((ldist + rdist) & (reg_bits - 1)) == 0) { + ReplaceRotateWithRor(op, ushr, shl); + return true; + } + return false; +} + +// Replace code looking like (x >>> -d OP x << d): +// Neg neg, d +// UShr dst, x, neg +// Shl tmp, x, d +// OP dst, dst, tmp +// with +// Neg neg, d +// Ror dst, x, neg +// *** OR *** +// Replace code looking like (x >>> d OP x << -d): +// UShr dst, x, d +// Neg neg, d +// Shl tmp, x, neg +// OP dst, dst, tmp +// with +// Ror dst, x, d +bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + DCHECK(ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()); + bool neg_is_left = shl->GetRight()->IsNeg(); + HNeg* neg = neg_is_left ? shl->GetRight()->AsNeg() : ushr->GetRight()->AsNeg(); + // And the shift distance being negated is the distance being shifted the other way. + if (neg->InputAt(0) == (neg_is_left ? ushr->GetRight() : shl->GetRight())) { + ReplaceRotateWithRor(op, ushr, shl); + } + return false; +} + +// Try replacing code looking like (x >>> d OP x << (#bits - d)): +// UShr dst, x, d +// Sub ld, #bits, d +// Shl tmp, x, ld +// OP dst, dst, tmp +// with +// Ror dst, x, d +// *** OR *** +// Replace code looking like (x >>> (#bits - d) OP x << d): +// Sub rd, #bits, d +// UShr dst, x, rd +// Shl tmp, x, d +// OP dst, dst, tmp +// with +// Neg neg, d +// Ror dst, x, neg +bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, + HUShr* ushr, + HShl* shl) { + DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); + DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()); + size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte; + HInstruction* shl_shift = shl->GetRight(); + HInstruction* ushr_shift = ushr->GetRight(); + if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) || + (ushr_shift->IsSub() && IsSubRegBitsMinusOther(ushr_shift->AsSub(), reg_bits, shl_shift))) { + return ReplaceRotateWithRor(op, ushr, shl); + } + return false; +} + void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { HInstruction* obj = null_check->InputAt(0); if (!obj->CanBeNull()) { @@ -530,7 +692,10 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); RecordSimplification(); neg->GetBlock()->RemoveInstruction(neg); + return; } + + TryReplaceWithRotate(instruction); } void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { @@ -612,13 +777,6 @@ void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condit void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) { // Try to fold an HCompare into this HCondition. - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS and MIPS64. - InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - if (instruction_set == kMips || instruction_set == kMips64) { - return; - } - HInstruction* left = condition->GetLeft(); HInstruction* right = condition->GetRight(); // We can only replace an HCondition which compares a Compare to 0. @@ -906,7 +1064,10 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { // src instruction->ReplaceWith(instruction->GetLeft()); instruction->GetBlock()->RemoveInstruction(instruction); + return; } + + TryReplaceWithRotate(instruction); } void InstructionSimplifierVisitor::VisitShl(HShl* instruction) { @@ -1027,6 +1188,8 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { RecordSimplification(); return; } + + TryReplaceWithRotate(instruction); } void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) { @@ -1095,6 +1258,42 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { } } +void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic); + // This simplification is currently supported on x86, x86_64, ARM and ARM64. + // TODO: Implement it for MIPS/64. + const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); + switch (instruction_set) { + case kArm: + case kArm64: + case kThumb2: + case kX86: + case kX86_64: + break; + default: + return; + } + HInstruction* value = invoke->InputAt(0); + HInstruction* distance = invoke->InputAt(1); + // Replace the invoke with an HRor. + if (is_left) { + distance = new (GetGraph()->GetArena()) HNeg(distance->GetType(), distance); + invoke->GetBlock()->InsertInstructionBefore(distance, invoke); + } + HRor* ror = new (GetGraph()->GetArena()) HRor(value->GetType(), value, distance); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror); + // Remove ClinitCheck and LoadClass, if possible. + HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1); + if (clinit->IsClinitCheck() && !clinit->HasUses()) { + clinit->GetBlock()->RemoveInstruction(clinit); + HInstruction* ldclass = clinit->InputAt(0); + if (ldclass->IsLoadClass() && !ldclass->HasUses()) { + ldclass->GetBlock()->RemoveInstruction(ldclass); + } + } +} + static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) { if (potential_length->IsArrayLength()) { return potential_length->InputAt(0) == potential_array; @@ -1165,6 +1364,12 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { SimplifyStringEquals(instruction); } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) { SimplifySystemArrayCopy(instruction); + } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateRight || + instruction->GetIntrinsic() == Intrinsics::kLongRotateRight) { + SimplifyRotate(instruction, false); + } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft || + instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) { + SimplifyRotate(instruction, true); } } diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index eb79f469eb..6a34b13320 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -16,11 +16,16 @@ #include "instruction_simplifier_arm64.h" +#include "common_arm64.h" #include "mirror/array-inl.h" namespace art { namespace arm64 { +using helpers::CanFitInShifterOperand; +using helpers::HasShifterOperand; +using helpers::ShifterOperandSupportsExtension; + void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, @@ -62,6 +67,169 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio RecordSimplification(); } +bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge) { + DCHECK(HasShifterOperand(use)); + DCHECK(use->IsBinaryOperation() || use->IsNeg()); + DCHECK(CanFitInShifterOperand(bitfield_op)); + DCHECK(!bitfield_op->HasEnvironmentUses()); + + Primitive::Type type = use->GetType(); + if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + return false; + } + + HInstruction* left; + HInstruction* right; + if (use->IsBinaryOperation()) { + left = use->InputAt(0); + right = use->InputAt(1); + } else { + DCHECK(use->IsNeg()); + right = use->AsNeg()->InputAt(0); + left = GetGraph()->GetConstant(right->GetType(), 0); + } + DCHECK(left == bitfield_op || right == bitfield_op); + + if (left == right) { + // TODO: Handle special transformations in this situation? + // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`? + // Or should this be part of a separate transformation logic? + return false; + } + + bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative(); + HInstruction* other_input; + if (bitfield_op == right) { + other_input = left; + } else { + if (is_commutative) { + other_input = right; + } else { + return false; + } + } + + HArm64DataProcWithShifterOp::OpKind op_kind; + int shift_amount = 0; + HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); + + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) && + !ShifterOperandSupportsExtension(use)) { + return false; + } + + if (do_merge) { + HArm64DataProcWithShifterOp* alu_with_op = + new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); + use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); + if (bitfield_op->GetUses().IsEmpty()) { + bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); + } + RecordSimplification(); + } + + return true; +} + +// Merge a bitfield move instruction into its uses if it can be merged in all of them. +bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) { + DCHECK(CanFitInShifterOperand(bitfield_op)); + + if (bitfield_op->HasEnvironmentUses()) { + return false; + } + + const HUseList<HInstruction*>& uses = bitfield_op->GetUses(); + + // Check whether we can merge the instruction in all its users' shifter operand. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + if (!HasShifterOperand(use)) { + return false; + } + if (!CanMergeIntoShifterOperand(use, bitfield_op)) { + return false; + } + } + + // Merge the instruction into its uses. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + bool merged = MergeIntoShifterOperand(use, bitfield_op); + DCHECK(merged); + } + + return true; +} + +bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( + HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return false; +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -76,5 +244,110 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { Primitive::ComponentSize(instruction->GetComponentType())); } +void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntOrLongType(type)) { + return; + } + + HInstruction* use = instruction->HasNonEnvironmentUses() + ? instruction->GetUses().GetFirst()->GetUser() + : nullptr; + + if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == instruction) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, instruction); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HArm64MultiplyAccumulate* mulacc = + new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, + binop->GetKind(), + accumulator, + instruction->GetLeft(), + instruction->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!instruction->HasUses()); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (instruction->GetLeft() == instruction->GetRight()) { + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { + return; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { + return; + } +} + +void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) { + Primitive::Type result_type = instruction->GetResultType(); + Primitive::Type input_type = instruction->GetInputType(); + + if (input_type == result_type) { + // We let the arch-independent code handle this. + return; + } + + if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4b697dba0e..b7f490bb8c 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -39,9 +39,30 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* array, HInstruction* index, int access_size); + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, true); + } + + bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other); + // HInstruction visitors, sorted alphabetically. void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index b01324ec3b..7127215c51 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -189,6 +189,42 @@ static Intrinsics GetIntrinsic(InlineMethod method) { return ((method.d.data & kIntrinsicFlagMin) == 0) ? Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong; + // More math builtins. + case kIntrinsicCos: + return Intrinsics::kMathCos; + case kIntrinsicSin: + return Intrinsics::kMathSin; + case kIntrinsicAcos: + return Intrinsics::kMathAcos; + case kIntrinsicAsin: + return Intrinsics::kMathAsin; + case kIntrinsicAtan: + return Intrinsics::kMathAtan; + case kIntrinsicAtan2: + return Intrinsics::kMathAtan2; + case kIntrinsicCbrt: + return Intrinsics::kMathCbrt; + case kIntrinsicCosh: + return Intrinsics::kMathCosh; + case kIntrinsicExp: + return Intrinsics::kMathExp; + case kIntrinsicExpm1: + return Intrinsics::kMathExpm1; + case kIntrinsicHypot: + return Intrinsics::kMathHypot; + case kIntrinsicLog: + return Intrinsics::kMathLog; + case kIntrinsicLog10: + return Intrinsics::kMathLog10; + case kIntrinsicNextAfter: + return Intrinsics::kMathNextAfter; + case kIntrinsicSinh: + return Intrinsics::kMathSinh; + case kIntrinsicTan: + return Intrinsics::kMathTan; + case kIntrinsicTanh: + return Intrinsics::kMathTanh; + // Misc math. case kIntrinsicSqrt: return Intrinsics::kMathSqrt; @@ -384,7 +420,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile // InvokeStaticOrDirect. InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ? - invoke->AsInvokeStaticOrDirect()->GetInvokeType() : + invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() : invoke->IsInvokeVirtual() ? kVirtual : kSuper; switch (intrinsic_type) { case kStatic: diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index d2017da221..4683aee603 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -113,10 +113,10 @@ void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -127,10 +127,10 @@ void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -240,178 +240,6 @@ void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); } -static void GenIntegerRotate(LocationSummary* locations, - ArmAssembler* assembler, - bool is_left) { - Register in = locations->InAt(0).AsRegister<Register>(); - Location rhs = locations->InAt(1); - Register out = locations->Out().AsRegister<Register>(); - - if (rhs.IsConstant()) { - // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31], - // so map all rotations to a +ve. equivalent in that range. - // (e.g. left *or* right by -2 bits == 30 bits in the same direction.) - uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F; - if (rot) { - // Rotate, mapping left rotations to right equivalents if necessary. - // (e.g. left by 2 bits == right by 30.) - __ Ror(out, in, is_left ? (0x20 - rot) : rot); - } else if (out != in) { - __ Mov(out, in); - } - } else { - if (is_left) { - __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0)); - __ Ror(out, in, out); - } else { - __ Ror(out, in, rhs.AsRegister<Register>()); - } - } -} - -// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer -// rotates by swapping input regs (effectively rotating by the first 32-bits of -// a larger rotation) or flipping direction (thus treating larger right/left -// rotations as sub-word sized rotations in the other direction) as appropriate. -static void GenLongRotate(LocationSummary* locations, - ArmAssembler* assembler, - bool is_left) { - Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Location rhs = locations->InAt(1); - Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>(); - - if (rhs.IsConstant()) { - uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue(); - // Map all left rotations to right equivalents. - if (is_left) { - rot = 0x40 - rot; - } - // Map all rotations to +ve. equivalents on the interval [0,63]. - rot &= 0x3F; - // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate - // logic below to a simple pair of binary orr. - // (e.g. 34 bits == in_reg swap + 2 bits right.) - if (rot >= 0x20) { - rot -= 0x20; - std::swap(in_reg_hi, in_reg_lo); - } - // Rotate, or mov to out for zero or word size rotations. - if (rot) { - __ Lsr(out_reg_hi, in_reg_hi, rot); - __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot)); - __ Lsr(out_reg_lo, in_reg_lo, rot); - __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot)); - } else { - __ Mov(out_reg_lo, in_reg_lo); - __ Mov(out_reg_hi, in_reg_hi); - } - } else { - Register shift_left = locations->GetTemp(0).AsRegister<Register>(); - Register shift_right = locations->GetTemp(1).AsRegister<Register>(); - Label end; - Label right; - - __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F)); - __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6); - __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep); - - if (is_left) { - __ b(&right, CS); - } else { - __ b(&right, CC); - std::swap(shift_left, shift_right); - } - - // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). - // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). - __ Lsl(out_reg_hi, in_reg_hi, shift_left); - __ Lsr(out_reg_lo, in_reg_lo, shift_right); - __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); - __ Lsl(out_reg_lo, in_reg_lo, shift_left); - __ Lsr(shift_left, in_reg_hi, shift_right); - __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left)); - __ b(&end); - - // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). - // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left). - __ Bind(&right); - __ Lsr(out_reg_hi, in_reg_hi, shift_right); - __ Lsl(out_reg_lo, in_reg_lo, shift_left); - __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo)); - __ Lsr(out_reg_lo, in_reg_lo, shift_right); - __ Lsl(shift_right, in_reg_hi, shift_left); - __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right)); - - __ Bind(&end); - } -} - -void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) { - GenIntegerRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */); -} - -void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - if (invoke->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) { - GenLongRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */); -} - -void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) { - GenIntegerRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */); -} - -void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - if (invoke->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant())); - } else { - locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - } - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) { - GenLongRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */); -} - static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { Location in = locations->InAt(0); Location out = locations->Out(); @@ -429,7 +257,7 @@ void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) { @@ -437,7 +265,7 @@ void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { @@ -486,7 +314,7 @@ void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } @@ -495,7 +323,7 @@ void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } static void GenMinMax(LocationSummary* locations, @@ -526,7 +354,7 @@ void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); } void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) { @@ -534,7 +362,7 @@ void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); } void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) { @@ -742,22 +570,22 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) } void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, @@ -787,31 +615,34 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, } void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke); + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke); + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke); + CreateIntIntIntIntToVoid( + arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke); } static void GenUnsafePut(LocationSummary* locations, @@ -873,31 +704,67 @@ static void GenUnsafePut(LocationSummary* locations, } void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, @@ -958,8 +825,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ ldrex(tmp_lo, tmp_ptr); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -985,15 +859,17 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). // - // TODO(rpl): Fix it and turn it back on. + // Also, the UnsafeCASObject intrinsic does not always work when heap + // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it + // off temporarily as a quick fix (b/26204023). // - // TODO(rpl): Also, we should investigate whether we need a read - // barrier in the generated code. - if (kPoisonHeapReferences) { + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } @@ -1245,7 +1121,8 @@ void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); + GenerateVisitStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); } void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) { @@ -1265,7 +1142,8 @@ void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); + GenerateVisitStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); } void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -1644,7 +1522,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { temp2, dest, Register(kNoRegister), - false); + /* can_be_null */ false); __ Bind(slow_path->GetExitLabel()); } @@ -1659,8 +1537,12 @@ void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongReverse) UNIMPLEMENTED_INTRINSIC(LongReverseBytes) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble) UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat) @@ -1677,6 +1559,23 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 059abf090d..9f6863cf6e 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -143,7 +143,23 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ masm-> @@ -186,10 +202,10 @@ void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke } void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), true, GetVIXLAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); } void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), true, GetVIXLAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -200,10 +216,10 @@ void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), false, GetVIXLAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), false, GetVIXLAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -314,103 +330,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); } -static void GenRotateRight(LocationSummary* locations, - Primitive::Type type, - vixl::MacroAssembler* masm) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); - - Location in = locations->InAt(0); - Location out = locations->Out(); - Operand rhs = OperandFrom(locations->InAt(1), type); - - if (rhs.IsImmediate()) { - uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1); - __ Ror(RegisterFrom(out, type), - RegisterFrom(in, type), - shift); - } else { - DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0); - __ Ror(RegisterFrom(out, type), - RegisterFrom(in, type), - rhs.reg()); - } -} - -void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) { - GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) { - GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); -} - -static void GenRotateLeft(LocationSummary* locations, - Primitive::Type type, - vixl::MacroAssembler* masm) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); - - Location in = locations->InAt(0); - Location out = locations->Out(); - Operand rhs = OperandFrom(locations->InAt(1), type); - - if (rhs.IsImmediate()) { - uint32_t regsize = RegisterFrom(in, type).SizeInBits(); - uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1); - __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift); - } else { - DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0); - __ Neg(RegisterFrom(out, type), - Operand(RegisterFrom(locations->InAt(1), type))); - __ Ror(RegisterFrom(out, type), - RegisterFrom(in, type), - RegisterFrom(out, type)); - } -} - -void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) { - GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); -} - -void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) { - GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); -} - static void GenReverse(LocationSummary* locations, Primitive::Type type, vixl::MacroAssembler* masm) { @@ -461,7 +380,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetVIXLAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { @@ -469,7 +388,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetVIXLAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { @@ -498,7 +417,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), false, GetVIXLAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { @@ -506,7 +425,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), true, GetVIXLAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); } static void GenMinMaxFP(LocationSummary* locations, @@ -541,7 +460,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetVIXLAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { @@ -549,7 +468,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetVIXLAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -557,7 +476,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetVIXLAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -565,7 +484,8 @@ void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetVIXLAssembler()); + GenMinMaxFP( + invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); } static void GenMinMax(LocationSummary* locations, @@ -598,7 +518,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, false, GetVIXLAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { @@ -606,7 +526,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, true, GetVIXLAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { @@ -614,7 +534,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, false, GetVIXLAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { @@ -622,7 +542,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, true, GetVIXLAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { @@ -698,7 +618,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { - GenMathRound(invoke->GetLocations(), true, GetVIXLAssembler()); + GenMathRound(invoke->GetLocations(), /* is_double */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { @@ -706,7 +626,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { - GenMathRound(invoke->GetLocations(), false, GetVIXLAssembler()); + GenMathRound(invoke->GetLocations(), /* is_double */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { @@ -818,9 +738,12 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. - Register trg = RegisterFrom(locations->Out(), type); + Location base_loc = locations->InAt(1); + Register base = WRegisterFrom(base_loc); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. + Location trg_loc = locations->Out(); + Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); @@ -837,13 +760,18 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -871,22 +799,22 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invok } void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { @@ -977,31 +905,67 @@ static void GenUnsafePut(LocationSummary* locations, } void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { @@ -1057,6 +1021,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (use_acquire_release) { __ Bind(&loop_head); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1064,6 +1031,14 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat } else { __ Dmb(InnerShareable, BarrierWrites); __ Bind(&loop_head); + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ Ldxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); @@ -1087,11 +1062,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). + // + // Also, the UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. - // TODO(rpl): Fix it and turn it back on. - if (kPoisonHeapReferences) { + // off temporarily as a quick fix (b/26204023). + // + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } @@ -1345,7 +1326,8 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true); + GenerateVisitStringIndexOf( + invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); } void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { @@ -1365,7 +1347,8 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false); + GenerateVisitStringIndexOf( + invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); } void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -1454,11 +1437,33 @@ void IntrinsicLocationsBuilderARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) + #undef UNIMPLEMENTED_INTRINSIC #undef __ diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 8f1d5e1c4d..96f43a0f74 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -51,6 +51,23 @@ V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \ V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \ V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \ + V(MathCos, kStatic, kNeedsEnvironmentOrCache) \ + V(MathSin, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAcos, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAsin, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAtan, kStatic, kNeedsEnvironmentOrCache) \ + V(MathAtan2, kStatic, kNeedsEnvironmentOrCache) \ + V(MathCbrt, kStatic, kNeedsEnvironmentOrCache) \ + V(MathCosh, kStatic, kNeedsEnvironmentOrCache) \ + V(MathExp, kStatic, kNeedsEnvironmentOrCache) \ + V(MathExpm1, kStatic, kNeedsEnvironmentOrCache) \ + V(MathHypot, kStatic, kNeedsEnvironmentOrCache) \ + V(MathLog, kStatic, kNeedsEnvironmentOrCache) \ + V(MathLog10, kStatic, kNeedsEnvironmentOrCache) \ + V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache) \ + V(MathSinh, kStatic, kNeedsEnvironmentOrCache) \ + V(MathTan, kStatic, kNeedsEnvironmentOrCache) \ + V(MathTanh, kStatic, kNeedsEnvironmentOrCache) \ V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \ V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \ V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \ diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 326844526e..06fab616ad 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -43,6 +43,14 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() { return codegen_->GetGraph()->GetArena(); } +inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() { + return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); +} + +inline bool IntrinsicCodeGeneratorMIPS::IsR6() { + return codegen_->GetInstructionSetFeatures().IsR6(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -168,7 +176,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invo } void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } // int java.lang.Float.floatToRawIntBits(float) @@ -177,7 +185,7 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -210,7 +218,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } // float java.lang.Float.intBitsToFloat(int) @@ -219,24 +227,29 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Location::OutputOverlap overlaps = Location::kNoOutputOverlap) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), overlaps); } -static void GenReverseBytes(LocationSummary* locations, - Primitive::Type type, - MipsAssembler* assembler, - bool isR2OrNewer) { +static void GenReverse(LocationSummary* locations, + Primitive::Type type, + bool isR2OrNewer, + bool isR6, + bool reverseBits, + MipsAssembler* assembler) { DCHECK(type == Primitive::kPrimShort || type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type != Primitive::kPrimShort || !reverseBits); if (type == Primitive::kPrimShort) { Register in = locations->InAt(0).AsRegister<Register>(); @@ -273,6 +286,30 @@ static void GenReverseBytes(LocationSummary* locations, __ And(out, out, AT); __ Or(out, out, TMP); } + if (reverseBits) { + if (isR6) { + __ Bitswap(out, out); + } else { + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 4); + __ Srl(out, out, 4); + __ And(out, out, AT); + __ Or(out, TMP, out); + __ LoadConst32(AT, 0x33333333); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 2); + __ Srl(out, out, 2); + __ And(out, out, AT); + __ Or(out, TMP, out); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 1); + __ Srl(out, out, 1); + __ And(out, out, AT); + __ Or(out, TMP, out); + } + } } else if (type == Primitive::kPrimLong) { Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); @@ -314,6 +351,46 @@ static void GenReverseBytes(LocationSummary* locations, __ And(out_lo, out_lo, AT); __ Or(out_lo, out_lo, TMP); } + if (reverseBits) { + if (isR6) { + __ Bitswap(out_hi, out_hi); + __ Bitswap(out_lo, out_lo); + } else { + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(TMP, out_hi, AT); + __ Sll(TMP, TMP, 4); + __ Srl(out_hi, out_hi, 4); + __ And(out_hi, out_hi, AT); + __ Or(out_hi, TMP, out_hi); + __ And(TMP, out_lo, AT); + __ Sll(TMP, TMP, 4); + __ Srl(out_lo, out_lo, 4); + __ And(out_lo, out_lo, AT); + __ Or(out_lo, TMP, out_lo); + __ LoadConst32(AT, 0x33333333); + __ And(TMP, out_hi, AT); + __ Sll(TMP, TMP, 2); + __ Srl(out_hi, out_hi, 2); + __ And(out_hi, out_hi, AT); + __ Or(out_hi, TMP, out_hi); + __ And(TMP, out_lo, AT); + __ Sll(TMP, TMP, 2); + __ Srl(out_lo, out_lo, 2); + __ And(out_lo, out_lo, AT); + __ Or(out_lo, TMP, out_lo); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, out_hi, AT); + __ Sll(TMP, TMP, 1); + __ Srl(out_hi, out_hi, 1); + __ And(out_hi, out_hi, AT); + __ Or(out_hi, TMP, out_hi); + __ And(TMP, out_lo, AT); + __ Sll(TMP, TMP, 1); + __ Srl(out_lo, out_lo, 1); + __ And(out_lo, out_lo, AT); + __ Or(out_lo, TMP, out_lo); + } + } } } @@ -323,10 +400,12 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), - Primitive::kPrimInt, - GetAssembler(), - codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); + GenReverse(invoke->GetLocations(), + Primitive::kPrimInt, + IsR2OrNewer(), + IsR6(), + false, + GetAssembler()); } // long java.lang.Long.reverseBytes(long) @@ -335,10 +414,12 @@ void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), - Primitive::kPrimLong, - GetAssembler(), - codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); + GenReverse(invoke->GetLocations(), + Primitive::kPrimLong, + IsR2OrNewer(), + IsR6(), + false, + GetAssembler()); } // short java.lang.Short.reverseBytes(short) @@ -347,10 +428,397 @@ void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), - Primitive::kPrimShort, - GetAssembler(), - codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); + GenReverse(invoke->GetLocations(), + Primitive::kPrimShort, + IsR2OrNewer(), + IsR6(), + false, + GetAssembler()); +} + +static void GenNumberOfLeadingZeroes(LocationSummary* locations, + bool is64bit, + bool isR6, + MipsAssembler* assembler) { + Register out = locations->Out().AsRegister<Register>(); + if (is64bit) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + + if (isR6) { + __ ClzR6(AT, in_hi); + __ ClzR6(TMP, in_lo); + __ Seleqz(TMP, TMP, in_hi); + } else { + __ ClzR2(AT, in_hi); + __ ClzR2(TMP, in_lo); + __ Movn(TMP, ZERO, in_hi); + } + __ Addu(out, AT, TMP); + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + + if (isR6) { + __ ClzR6(out, in); + } else { + __ ClzR2(out, in); + } + } +} + +// int java.lang.Integer.numberOfLeadingZeros(int i) +void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeroes(invoke->GetLocations(), false, IsR6(), GetAssembler()); +} + +// int java.lang.Long.numberOfLeadingZeros(long i) +void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenNumberOfLeadingZeroes(invoke->GetLocations(), true, IsR6(), GetAssembler()); +} + +static void GenNumberOfTrailingZeroes(LocationSummary* locations, + bool is64bit, + bool isR6, + bool isR2OrNewer, + MipsAssembler* assembler) { + Register out = locations->Out().AsRegister<Register>(); + Register in_lo; + Register in; + + if (is64bit) { + MipsLabel done; + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + + in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + + // If in_lo is zero then count the number of trailing zeroes in in_hi; + // otherwise count the number of trailing zeroes in in_lo. + // AT = in_lo ? in_lo : in_hi; + if (isR6) { + __ Seleqz(out, in_hi, in_lo); + __ Selnez(TMP, in_lo, in_lo); + __ Or(out, out, TMP); + } else { + __ Movz(out, in_hi, in_lo); + __ Movn(out, in_lo, in_lo); + } + + in = out; + } else { + in = locations->InAt(0).AsRegister<Register>(); + // Give in_lo a dummy value to keep the compiler from complaining. + // Since we only get here in the 32-bit case, this value will never + // be used. + in_lo = in; + } + + // We don't have an instruction to count the number of trailing zeroes. + // Start by flipping the bits end-for-end so we can count the number of + // leading zeroes instead. + if (isR2OrNewer) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + } else { + // MIPS32r1 + // __ Rotr(out, in, 16); + __ Sll(TMP, in, 16); + __ Srl(out, in, 16); + __ Or(out, out, TMP); + // __ Wsbh(out, out); + __ LoadConst32(AT, 0x00FF00FF); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 8); + __ Srl(out, out, 8); + __ And(out, out, AT); + __ Or(out, out, TMP); + } + + if (isR6) { + __ Bitswap(out, out); + __ ClzR6(out, out); + } else { + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 4); + __ Srl(out, out, 4); + __ And(out, out, AT); + __ Or(out, TMP, out); + __ LoadConst32(AT, 0x33333333); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 2); + __ Srl(out, out, 2); + __ And(out, out, AT); + __ Or(out, TMP, out); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 1); + __ Srl(out, out, 1); + __ And(out, out, AT); + __ Or(out, TMP, out); + __ ClzR2(out, out); + } + + if (is64bit) { + // If in_lo is zero, then we counted the number of trailing zeroes in in_hi so we must add the + // number of trailing zeroes in in_lo (32) to get the correct final count + __ LoadConst32(TMP, 32); + if (isR6) { + __ Seleqz(TMP, TMP, in_lo); + } else { + __ Movn(TMP, ZERO, in_lo); + } + __ Addu(out, out, TMP); + } +} + +// int java.lang.Integer.numberOfTrailingZeros(int i) +void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeroes(invoke->GetLocations(), false, IsR6(), IsR2OrNewer(), GetAssembler()); +} + +// int java.lang.Long.numberOfTrailingZeros(long i) +void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + GenNumberOfTrailingZeroes(invoke->GetLocations(), true, IsR6(), IsR2OrNewer(), GetAssembler()); +} + +enum RotationDirection { + kRotateRight, + kRotateLeft, +}; + +static void GenRotate(HInvoke* invoke, + Primitive::Type type, + bool isR2OrNewer, + RotationDirection direction, + MipsAssembler* assembler) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + LocationSummary* locations = invoke->GetLocations(); + if (invoke->InputAt(1)->IsIntConstant()) { + int32_t shift = static_cast<int32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()); + if (type == Primitive::kPrimInt) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + shift &= 0x1f; + if (direction == kRotateLeft) { + shift = (32 - shift) & 0x1F; + } + + if (isR2OrNewer) { + if ((shift != 0) || (out != in)) { + __ Rotr(out, in, shift); + } + } else { + if (shift == 0) { + if (out != in) { + __ Move(out, in); + } + } else { + __ Srl(AT, in, shift); + __ Sll(out, in, 32 - shift); + __ Or(out, out, AT); + } + } + } else { // Primitive::kPrimLong + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + shift &= 0x3f; + if (direction == kRotateLeft) { + shift = (64 - shift) & 0x3F; + } + + if (shift == 0) { + __ Move(out_lo, in_lo); + __ Move(out_hi, in_hi); + } else if (shift == 32) { + __ Move(out_lo, in_hi); + __ Move(out_hi, in_lo); + } else if (shift < 32) { + __ Srl(AT, in_lo, shift); + __ Sll(out_lo, in_hi, 32 - shift); + __ Or(out_lo, out_lo, AT); + __ Srl(AT, in_hi, shift); + __ Sll(out_hi, in_lo, 32 - shift); + __ Or(out_hi, out_hi, AT); + } else { + __ Sll(AT, in_lo, 64 - shift); + __ Srl(out_lo, in_hi, shift - 32); + __ Or(out_lo, out_lo, AT); + __ Sll(AT, in_hi, 64 - shift); + __ Srl(out_hi, in_lo, shift - 32); + __ Or(out_hi, out_hi, AT); + } + } + } else { // !invoke->InputAt(1)->IsIntConstant() + Register shamt = locations->InAt(1).AsRegister<Register>(); + if (type == Primitive::kPrimInt) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + if (direction == kRotateRight) { + __ Rotrv(out, in, shamt); + } else { + // negu tmp, shamt + __ Subu(TMP, ZERO, shamt); + __ Rotrv(out, in, TMP); + } + } else { + if (direction == kRotateRight) { + __ Srlv(AT, in, shamt); + __ Subu(TMP, ZERO, shamt); + __ Sllv(out, in, TMP); + __ Or(out, out, AT); + } else { + __ Sllv(AT, in, shamt); + __ Subu(TMP, ZERO, shamt); + __ Srlv(out, in, TMP); + __ Or(out, out, AT); + } + } + } else { // Primitive::kPrimLong + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel done; + + if (direction == kRotateRight) { + __ Nor(TMP, ZERO, shamt); + __ Srlv(AT, in_lo, shamt); + __ Sll(out_lo, in_hi, 1); + __ Sllv(out_lo, out_lo, TMP); + __ Or(out_lo, out_lo, AT); + __ Srlv(AT, in_hi, shamt); + __ Sll(out_hi, in_lo, 1); + __ Sllv(out_hi, out_hi, TMP); + __ Or(out_hi, out_hi, AT); + } else { + __ Nor(TMP, ZERO, shamt); + __ Sllv(AT, in_lo, shamt); + __ Srl(out_lo, in_hi, 1); + __ Srlv(out_lo, out_lo, TMP); + __ Or(out_lo, out_lo, AT); + __ Sllv(AT, in_hi, shamt); + __ Srl(out_hi, in_lo, 1); + __ Srlv(out_hi, out_hi, TMP); + __ Or(out_hi, out_hi, AT); + } + + __ Andi(TMP, shamt, 32); + __ Beqz(TMP, &done); + __ Move(TMP, out_hi); + __ Move(out_hi, out_lo); + __ Move(out_lo, TMP); + + __ Bind(&done); + } + } +} + +// int java.lang.Integer.rotateRight(int i, int distance) +void IntrinsicLocationsBuilderMIPS::VisitIntegerRotateRight(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerRotateRight(HInvoke* invoke) { + GenRotate(invoke, Primitive::kPrimInt, IsR2OrNewer(), kRotateRight, GetAssembler()); +} + +// long java.lang.Long.rotateRight(long i, int distance) +void IntrinsicLocationsBuilderMIPS::VisitLongRotateRight(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongRotateRight(HInvoke* invoke) { + GenRotate(invoke, Primitive::kPrimLong, IsR2OrNewer(), kRotateRight, GetAssembler()); +} + +// int java.lang.Integer.rotateLeft(int i, int distance) +void IntrinsicLocationsBuilderMIPS::VisitIntegerRotateLeft(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerRotateLeft(HInvoke* invoke) { + GenRotate(invoke, Primitive::kPrimInt, IsR2OrNewer(), kRotateLeft, GetAssembler()); +} + +// long java.lang.Long.rotateLeft(long i, int distance) +void IntrinsicLocationsBuilderMIPS::VisitLongRotateLeft(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongRotateLeft(HInvoke* invoke) { + GenRotate(invoke, Primitive::kPrimLong, IsR2OrNewer(), kRotateLeft, GetAssembler()); +} + +// int java.lang.Integer.reverse(int) +void IntrinsicLocationsBuilderMIPS::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), + Primitive::kPrimInt, + IsR2OrNewer(), + IsR6(), + true, + GetAssembler()); +} + +// long java.lang.Long.reverse(long) +void IntrinsicLocationsBuilderMIPS::VisitLongReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), + Primitive::kPrimLong, + IsR2OrNewer(), + IsR6(), + true, + GetAssembler()); } // boolean java.lang.String.equals(Object anObject) @@ -463,10 +931,6 @@ void IntrinsicLocationsBuilderMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUS void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) UNIMPLEMENTED_INTRINSIC(MathAbsDouble) UNIMPLEMENTED_INTRINSIC(MathAbsFloat) UNIMPLEMENTED_INTRINSIC(MathAbsInt) @@ -519,18 +983,29 @@ UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars) UNIMPLEMENTED_INTRINSIC(StringNewStringFromString) -UNIMPLEMENTED_INTRINSIC(LongRotateLeft) -UNIMPLEMENTED_INTRINSIC(LongRotateRight) -UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros) -UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) -UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) -UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) #undef UNIMPLEMENTED_INTRINSIC #undef __ diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index c71b3c68b7..19ad5255d5 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -67,6 +67,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + bool IsR2OrNewer(void); + bool IsR6(void); + private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 3654159f83..8b45ea7c4f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -115,7 +115,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } @@ -162,7 +162,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* in } void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } // int java.lang.Float.floatToRawIntBits(float) @@ -171,7 +171,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invok } void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -199,7 +199,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invok } void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } // float java.lang.Float.intBitsToFloat(int) @@ -208,7 +208,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -290,7 +290,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), false, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } // int java.lang.Long.numberOfLeadingZeros(long i) @@ -299,7 +299,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), true, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } static void GenNumberOfTrailingZeroes(LocationSummary* locations, @@ -327,7 +327,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), false, GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } // int java.lang.Long.numberOfTrailingZeros(long i) @@ -336,7 +336,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), true, GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } static void GenRotateRight(HInvoke* invoke, @@ -525,7 +525,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } // float java.lang.Math.abs(float) @@ -534,7 +534,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { @@ -566,7 +566,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } // long java.lang.Math.abs(long) @@ -575,7 +575,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } static void GenMinMaxFP(LocationSummary* locations, @@ -616,7 +616,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler()); } // float java.lang.Math.min(float, float) @@ -625,7 +625,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler()); } // double java.lang.Math.max(double, double) @@ -634,7 +634,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler()); } // float java.lang.Math.max(float, float) @@ -643,7 +643,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler()); } static void GenMinMax(LocationSummary* locations, @@ -713,7 +713,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); } // long java.lang.Math.min(long, long) @@ -722,7 +722,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); } // int java.lang.Math.max(int, int) @@ -731,7 +731,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); } // long java.lang.Math.max(long, long) @@ -740,7 +740,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); } // double java.lang.Math.sqrt(double) @@ -806,7 +806,7 @@ static void GenRoundingMode(LocationSummary* locations, DCHECK_NE(in, out); - Label done; + Mips64Label done; // double floor/ceil(double in) { // if in.isNaN || in.isInfinite || in.isZero { @@ -1045,7 +1045,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); } // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) @@ -1054,7 +1054,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); } // long sun.misc.Unsafe.getLong(Object o, long offset) @@ -1063,7 +1063,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); } // long sun.misc.Unsafe.getLongVolatile(Object o, long offset) @@ -1072,7 +1072,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); } // Object sun.misc.Unsafe.getObject(Object o, long offset) @@ -1081,7 +1081,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); } // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) @@ -1090,7 +1090,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invo } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { @@ -1151,7 +1151,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } // void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x) @@ -1160,7 +1164,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } // void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x) @@ -1169,7 +1177,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimInt, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } // void sun.misc.Unsafe.putObject(Object o, long offset, Object x) @@ -1178,7 +1190,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } // void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x) @@ -1187,7 +1203,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invok } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } // void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x) @@ -1196,7 +1216,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invo } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimNot, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } // void sun.misc.Unsafe.putLong(Object o, long offset, long x) @@ -1205,7 +1229,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ false, + codegen_); } // void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x) @@ -1214,7 +1242,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ false, + /* is_ordered */ true, + codegen_); } // void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x) @@ -1223,7 +1255,11 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_); + GenUnsafePut(invoke->GetLocations(), + Primitive::kPrimLong, + /* is_volatile */ true, + /* is_ordered */ false, + codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { @@ -1256,13 +1292,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - Label loop_head, exit_loop; + Mips64Label loop_head, exit_loop; __ Daddu(TMP, base, offset); __ Sync(0); __ Bind(&loop_head); if (type == Primitive::kPrimLong) { __ Lld(out, TMP); } else { + // Note: We will need a read barrier here, when read barrier + // support is added to the MIPS64 back end. __ Ll(out, TMP); } __ Dsubu(out, out, expected); // If we didn't get the 'expected' @@ -1418,10 +1456,10 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); - Label loop; - Label end; - Label return_true; - Label return_false; + Mips64Label loop; + Mips64Label end; + Mips64Label return_true; + Mips64Label return_false; // Get offsets of count, value, and class fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); @@ -1485,7 +1523,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ LoadConst64(out, 1); - __ B(&end); + __ Bc(&end); // Return false and exit the function. __ Bind(&return_false); @@ -1514,7 +1552,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // full slow-path down and branch unconditionally. slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } @@ -1565,7 +1603,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); } // int java.lang.String.indexOf(int ch, int fromIndex) @@ -1584,7 +1622,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); + GenerateStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); } // java.lang.String.String(byte[] bytes) @@ -1693,6 +1732,24 @@ UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MathCos) +UNIMPLEMENTED_INTRINSIC(MathSin) +UNIMPLEMENTED_INTRINSIC(MathAcos) +UNIMPLEMENTED_INTRINSIC(MathAsin) +UNIMPLEMENTED_INTRINSIC(MathAtan) +UNIMPLEMENTED_INTRINSIC(MathAtan2) +UNIMPLEMENTED_INTRINSIC(MathCbrt) +UNIMPLEMENTED_INTRINSIC(MathCosh) +UNIMPLEMENTED_INTRINSIC(MathExp) +UNIMPLEMENTED_INTRINSIC(MathExpm1) +UNIMPLEMENTED_INTRINSIC(MathHypot) +UNIMPLEMENTED_INTRINSIC(MathLog) +UNIMPLEMENTED_INTRINSIC(MathLog10) +UNIMPLEMENTED_INTRINSIC(MathNextAfter) +UNIMPLEMENTED_INTRINSIC(MathSinh) +UNIMPLEMENTED_INTRINSIC(MathTan) +UNIMPLEMENTED_INTRINSIC(MathTanh) + #undef UNIMPLEMENTED_INTRINSIC #undef __ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 371588fc47..80190629ee 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -138,31 +138,31 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* } void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke, true); + CreateFPToIntLocations(arena_, invoke, /* is64bit */ true); } void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke, true); + CreateIntToFPLocations(arena_, invoke, /* is64bit */ true); } void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke, false); + CreateFPToIntLocations(arena_, invoke, /* is64bit */ false); } void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke, false); + CreateIntToFPLocations(arena_, invoke, /* is64bit */ false); } void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -298,7 +298,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { @@ -306,7 +306,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) { @@ -490,7 +490,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { @@ -498,7 +498,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -506,7 +506,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -514,7 +514,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler()); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, @@ -597,7 +597,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { @@ -605,7 +605,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { @@ -613,7 +613,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { @@ -621,7 +621,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); } static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -788,6 +788,195 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&done); } +static void CreateFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); +} + +static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->WillCall()); + DCHECK(invoke->IsInvokeStaticOrDirect()); + X86Assembler* assembler = codegen->GetAssembler(); + + // We need some place to pass the parameters. + __ subl(ESP, Immediate(16)); + __ cfi().AdjustCFAOffset(16); + + // Pass the parameters at the bottom of the stack. + __ movsd(Address(ESP, 0), XMM0); + + // If we have a second parameter, pass it next. + if (invoke->GetNumberOfArguments() == 2) { + __ movsd(Address(ESP, 8), XMM1); + } + + // Now do the actual call. + __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(entry))); + + // Extract the return value from the FP stack. + __ fstpl(Address(ESP, 0)); + __ movsd(XMM0, Address(ESP, 0)); + + // And clean up the stack. + __ addl(ESP, Immediate(16)); + __ cfi().AdjustCFAOffset(-16); + + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCos); +} + +void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSin); +} + +void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAcos); +} + +void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAsin); +} + +void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan); +} + +void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCbrt); +} + +void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCosh); +} + +void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExp); +} + +void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExpm1); +} + +void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog); +} + +void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog10); +} + +void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSinh); +} + +void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTan); +} + +void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTanh); +} + +static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); +} + +void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan2); +} + +void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickHypot); +} + +void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickNextAfter); +} + void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1265,19 +1454,20 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, true); + CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); } void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); } void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, false); + CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); } void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); + GenerateStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); } void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -1600,12 +1790,27 @@ static void GenUnsafeGet(HInvoke* invoke, Location output_loc = locations->Out(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { Register output = output_loc.AsRegister<Register>(); __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); - if (type == Primitive::kPrimNot) { - codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + break; + } + + case Primitive::kPrimNot: { + Register output = output_loc.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + codegen->GenerateReadBarrierSlow( + invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + } + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ MaybeUnpoisonHeapReference(output); } break; } @@ -1633,8 +1838,10 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke, - bool is_long, bool is_volatile) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type, + bool is_volatile) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -1646,7 +1853,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - if (is_long) { + if (type == Primitive::kPrimLong) { if (is_volatile) { // Need to use XMM to read volatile. locations->AddTemp(Location::RequiresFpuRegister()); @@ -1657,45 +1864,50 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke } else { locations->SetOut(Location::RequiresRegister()); } + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, false, false); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, false, true); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, false, false); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, true, true); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, false, false); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, false, true); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true); } void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); } @@ -1722,31 +1934,40 @@ static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true); + CreateIntIntIntIntToVoidPlusTempsLocations( + arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true); } // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 @@ -1784,7 +2005,7 @@ static void GenUnsafePut(LocationSummary* locations, } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -1798,31 +2019,31 @@ static void GenUnsafePut(LocationSummary* locations, } void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, @@ -1864,6 +2085,17 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -1915,6 +2147,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -1924,11 +2163,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value` has been moved to a temporary register, no need to @@ -2267,56 +2503,6 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenTrailingZeros(assembler, invoke, /* is_long */ true); } -static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL or a constant. - locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, invoke->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void GenRotate(X86Assembler* assembler, HInvoke* invoke, bool is_left) { - LocationSummary* locations = invoke->GetLocations(); - Register first_reg = locations->InAt(0).AsRegister<Register>(); - Location second = locations->InAt(1); - - if (second.IsRegister()) { - Register second_reg = second.AsRegister<Register>(); - if (is_left) { - __ roll(first_reg, second_reg); - } else { - __ rorl(first_reg, second_reg); - } - } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (is_left) { - __ roll(first_reg, imm); - } else { - __ rorl(first_reg, imm); - } - } -} - -void IntrinsicLocationsBuilderX86::VisitIntegerRotateLeft(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitIntegerRotateLeft(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_left */ true); -} - -void IntrinsicLocationsBuilderX86::VisitIntegerRotateRight(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitIntegerRotateRight(HInvoke* invoke) { - X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_left */ false); -} - // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -2327,6 +2513,8 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 2d9f01b821..aa1c109738 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -115,10 +115,10 @@ void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invok } void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -129,10 +129,10 @@ void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -230,7 +230,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_); + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { @@ -238,7 +238,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_); + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); } static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { @@ -277,7 +277,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { @@ -285,7 +285,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); + GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } static void GenMinMaxFP(LocationSummary* locations, @@ -388,7 +388,8 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_); + GenMinMaxFP( + invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { @@ -396,7 +397,8 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_); + GenMinMaxFP( + invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -404,7 +406,8 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_); + GenMinMaxFP( + invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -412,7 +415,8 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_); + GenMinMaxFP( + invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, @@ -461,7 +465,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { @@ -469,7 +473,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { @@ -477,7 +481,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { @@ -485,7 +489,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); } static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -690,7 +694,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { __ j(kUnordered, &nan); // output = double-to-long-truncate(input) - __ cvttsd2si(out, inPlusPointFive, true); + __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true); __ jmp(&done); __ Bind(&nan); @@ -699,6 +703,188 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { __ Bind(&done); } +static void CreateFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + + // We have to ensure that the native code doesn't clobber the XMM registers which are + // non-volatile for ART, but volatile for Native calls. This will ensure that they are + // saved in the prologue and properly restored. + for (auto fp_reg : non_volatile_xmm_regs) { + locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); + } +} + +static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen, + QuickEntrypointEnum entry) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->WillCall()); + DCHECK(invoke->IsInvokeStaticOrDirect()); + X86_64Assembler* assembler = codegen->GetAssembler(); + + __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true)); + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCos); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSin); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAcos); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAsin); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCbrt); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickCosh); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExp); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickExpm1); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickLog10); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickSinh); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTan); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) { + CreateFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickTanh); +} + +static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + + // We have to ensure that the native code doesn't clobber the XMM registers which are + // non-volatile for ART, but volatile for Native calls. This will ensure that they are + // saved in the prologue and properly restored. + for (auto fp_reg : non_volatile_xmm_regs) { + locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickAtan2); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickHypot); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) { + CreateFPFPToFPCallLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickNextAfter); +} + void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1152,7 +1338,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { temp2, dest, CpuRegister(kNoRegister), - false); + /* value_can_be_null */ false); __ Bind(slow_path->GetExitLabel()); } @@ -1180,8 +1366,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ gs()->call(Address::Absolute( - QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true)); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), + /* no_rip */ true)); __ Bind(slow_path->GetExitLabel()); } @@ -1372,7 +1558,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Ensure we have a start index >= 0; __ xorl(counter, counter); __ cmpl(start_index, Immediate(0)); - __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough. + __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. // Move to the start of the string: string_obj + value_offset + 2 * start_index. __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); @@ -1409,19 +1595,20 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, true); + CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, false); + CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); + GenerateStringIndexOf( + invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); } void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -1446,8 +1633,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ gs()->call(Address::Absolute( - QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true)); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), + /* no_rip */ true)); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1466,8 +1653,8 @@ void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* inv void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { X86_64Assembler* assembler = GetAssembler(); - __ gs()->call(Address::Absolute( - QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true)); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), + /* no_rip */ true)); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1490,8 +1677,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invok codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); - __ gs()->call(Address::Absolute( - QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true)); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), + /* no_rip */ true)); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -1715,7 +1902,8 @@ void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); - GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); + GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), + /* no_rip */ true)); } static void GenUnsafeGet(HInvoke* invoke, @@ -1729,16 +1917,30 @@ static void GenUnsafeGet(HInvoke* invoke, Location offset_loc = locations->InAt(2); CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); Location output_loc = locations->Out(); - CpuRegister output = locations->Out().AsRegister<CpuRegister>(); + CpuRegister output = output_loc.AsRegister<CpuRegister>(); switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimNot: __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); - if (type == Primitive::kPrimNot) { - codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + break; + + case Primitive::kPrimNot: { + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + codegen->GenerateReadBarrierSlow( + invoke, output_loc, output_loc, base_loc, 0U, offset_loc); + } + } else { + __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + __ MaybeUnpoisonHeapReference(output); } break; + } case Primitive::kPrimLong: __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); @@ -1750,7 +1952,9 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -1763,45 +1967,50 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); + GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); } @@ -1871,7 +2080,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -1885,31 +2094,31 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool } void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, @@ -1941,6 +2150,17 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -1991,6 +2211,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -2000,11 +2227,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ setcc(kZero, out); __ movzxb(out, out); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value_reg` has been moved to a temporary register, no need @@ -2289,92 +2513,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(assembler, invoke, /* is_long */ true); } -static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL or a constant. - locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) { - LocationSummary* locations = invoke->GetLocations(); - CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); - Location second = locations->InAt(1); - - if (is_long) { - if (second.IsRegister()) { - CpuRegister second_reg = second.AsRegister<CpuRegister>(); - if (is_left) { - __ rolq(first_reg, second_reg); - } else { - __ rorq(first_reg, second_reg); - } - } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue); - if (is_left) { - __ rolq(first_reg, imm); - } else { - __ rorq(first_reg, imm); - } - } - } else { - if (second.IsRegister()) { - CpuRegister second_reg = second.AsRegister<CpuRegister>(); - if (is_left) { - __ roll(first_reg, second_reg); - } else { - __ rorl(first_reg, second_reg); - } - } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (is_left) { - __ roll(first_reg, imm); - } else { - __ rorl(first_reg, imm); - } - } - } -} - -void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true); -} - -void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false); -} - -void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true); -} - -void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) { - CreateRotateLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) { - X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler()); - GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false); -} - // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -2384,6 +2522,10 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE } UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) #undef UNIMPLEMENTED_INTRINSIC diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index c38bbe3477..02befc011a 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -121,6 +121,8 @@ void LICM::Run() { // phi in it. if (instruction->NeedsEnvironment()) { UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info); + } else { + DCHECK(!instruction->HasEnvironment()); } instruction->MoveBefore(pre_header->GetLastInstruction()); } else if (instruction->CanThrow()) { diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 2bb769a430..9ad003cc83 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -107,7 +107,7 @@ TEST_F(LICMTest, FieldHoisting) { BuildLoop(); // Populate the loop with instructions: set/get field with different types. - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, Primitive::kPrimLong, MemberOffset(10), @@ -134,7 +134,7 @@ TEST_F(LICMTest, NoFieldHoisting) { BuildLoop(); // Populate the loop with instructions: set/get field with same types. - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, Primitive::kPrimLong, MemberOffset(10), diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 5b89cfef5a..727f2bb717 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -119,10 +119,16 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { : ref_info_(ref_info), offset_(offset), index_(index), - declaring_class_def_index_(declaring_class_def_index) { + declaring_class_def_index_(declaring_class_def_index), + value_killed_by_loop_side_effects_(true) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); + if (ref_info->IsSingleton() && !IsArrayElement()) { + // Assume this location's value cannot be killed by loop side effects + // until proven otherwise. + value_killed_by_loop_side_effects_ = false; + } } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } @@ -139,11 +145,22 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return index_ != nullptr; } + bool IsValueKilledByLoopSideEffects() const { + return value_killed_by_loop_side_effects_; + } + + void SetValueKilledByLoopSideEffects(bool val) { + value_killed_by_loop_side_effects_ = val; + } + private: ReferenceInfo* const ref_info_; // reference for instance/static field or array access. const size_t offset_; // offset of static/instance field. HInstruction* const index_; // index of an array element. const int16_t declaring_class_def_index_; // declaring class's def's dex index. + bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop + // side effects because this location is stored + // into inside a loop. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -335,16 +352,24 @@ class HeapLocationCollector : public HGraphVisitor { return true; } - ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* ref) { - ReferenceInfo* ref_info = FindReferenceInfoOf(ref); + ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) { + ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); if (ref_info == nullptr) { size_t pos = ref_info_array_.size(); - ref_info = new (GetGraph()->GetArena()) ReferenceInfo(ref, pos); + ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); ref_info_array_.push_back(ref_info); } return ref_info; } + void CreateReferenceInfoForReferenceType(HInstruction* instruction) { + if (instruction->GetType() != Primitive::kPrimNot) { + return; + } + DCHECK(FindReferenceInfoOf(instruction) == nullptr); + GetOrCreateReferenceInfo(instruction); + } + HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, size_t offset, HInstruction* index, @@ -362,13 +387,13 @@ class HeapLocationCollector : public HGraphVisitor { return heap_locations_[heap_location_idx]; } - void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { + HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { if (field_info.IsVolatile()) { has_volatile_ = true; } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); } void VisitArrayAccess(HInstruction* array, HInstruction* index) { @@ -378,15 +403,20 @@ class HeapLocationCollector : public HGraphVisitor { void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; + if (instruction->GetBlock()->GetLoopInformation() != nullptr) { + location->SetValueKilledByLoopSideEffects(true); + } } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + CreateReferenceInfoForReferenceType(instruction); } void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { @@ -399,6 +429,7 @@ class HeapLocationCollector : public HGraphVisitor { void VisitArrayGet(HArrayGet* instruction) OVERRIDE { VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + CreateReferenceInfoForReferenceType(instruction); } void VisitArraySet(HArraySet* instruction) OVERRIDE { @@ -408,7 +439,23 @@ class HeapLocationCollector : public HGraphVisitor { void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. - GetOrCreateReferenceInfo(new_instance); + CreateReferenceInfoForReferenceType(new_instance); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); + } + + void VisitParameterValue(HParameterValue* instruction) OVERRIDE { + CreateReferenceInfoForReferenceType(instruction); } void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE { @@ -538,23 +585,26 @@ class LSEVisitor : public HGraphVisitor { HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); ArenaVector<HInstruction*>& pre_header_heap_values = heap_values_for_[pre_header->GetBlockId()]; + // Inherit the values from pre-header. + for (size_t i = 0; i < heap_values.size(); i++) { + heap_values[i] = pre_header_heap_values[i]; + } + // We do a single pass in reverse post order. For loops, use the side effects as a hint // to see if the heap values should be killed. if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { - for (size_t i = 0; i < pre_header_heap_values.size(); i++) { - // heap value is killed by loop side effects, need to keep the last store. - KeepIfIsStore(pre_header_heap_values[i]); - } - if (kIsDebugBuild) { - // heap_values should all be kUnknownHeapValue that it is inited with. - for (size_t i = 0; i < heap_values.size(); i++) { - DCHECK_EQ(heap_values[i], kUnknownHeapValue); - } - } - } else { - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { - heap_values[i] = pre_header_heap_values[i]; + HeapLocation* location = heap_location_collector_.GetHeapLocation(i); + ReferenceInfo* ref_info = location->GetReferenceInfo(); + if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) { + // heap value is killed by loop side effects (stored into directly, or due to + // aliasing). + KeepIfIsStore(pre_header_heap_values[i]); + heap_values[i] = kUnknownHeapValue; + } else { + // A singleton's field that's not stored into inside a loop is invariant throughout + // the loop. + } } } } @@ -628,6 +678,16 @@ class LSEVisitor : public HGraphVisitor { } } + static bool IsIntFloatAlias(Primitive::Type type1, Primitive::Type type2) { + return (type1 == Primitive::kPrimFloat && type2 == Primitive::kPrimInt) || + (type2 == Primitive::kPrimFloat && type1 == Primitive::kPrimInt); + } + + static bool IsLongDoubleAlias(Primitive::Type type1, Primitive::Type type2) { + return (type1 == Primitive::kPrimDouble && type2 == Primitive::kPrimLong) || + (type2 == Primitive::kPrimDouble && type1 == Primitive::kPrimLong); + } + void VisitGetLocation(HInstruction* instruction, HInstruction* ref, size_t offset, @@ -659,7 +719,8 @@ class LSEVisitor : public HGraphVisitor { if ((heap_value != kUnknownHeapValue) && // Keep the load due to possible I/F, J/D array aliasing. // See b/22538329 for details. - (heap_value->GetType() == instruction->GetType())) { + !IsIntFloatAlias(heap_value->GetType(), instruction->GetType()) && + !IsLongDoubleAlias(heap_value->GetType(), instruction->GetType())) { removed_loads_.push_back(instruction); substitute_instructions_for_loads_.push_back(heap_value); TryRemovingNullCheck(instruction); @@ -724,8 +785,11 @@ class LSEVisitor : public HGraphVisitor { if (loop_info != nullptr) { // instruction is a store in the loop so the loop must does write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + // If it's a singleton, IsValueKilledByLoopSideEffects() must be true. + DCHECK(!ref_info->IsSingleton() || + heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects()); - if (loop_info->IsLoopInvariant(original_ref, false)) { + if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); // Keep the store since its value may be needed at the loop header. possibly_redundant = false; @@ -933,8 +997,9 @@ class LSEVisitor : public HGraphVisitor { }; void LoadStoreElimination::Run() { - if (graph_->IsDebuggable()) { + if (graph_->IsDebuggable() || graph_->HasTryCatch()) { // Debugger may set heap values or trigger deoptimization of callers. + // Try/catch support not implemented yet. // Skip this optimization. return; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 890598d687..a37298c76e 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -587,15 +587,8 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const { return other.blocks_.IsBitSet(header_->GetBlockId()); } -bool HLoopInformation::IsLoopInvariant(HInstruction* instruction, bool must_dominate) const { - HLoopInformation* other_loop = instruction->GetBlock()->GetLoopInformation(); - if (other_loop != this && (other_loop == nullptr || !other_loop->IsIn(*this))) { - if (must_dominate) { - return instruction->GetBlock()->Dominates(GetHeader()); - } - return true; - } - return false; +bool HLoopInformation::IsDefinedOutOfTheLoop(HInstruction* instruction) const { + return !blocks_.IsBitSet(instruction->GetBlock()->GetBlockId()); } size_t HLoopInformation::GetLifetimeEnd() const { @@ -784,6 +777,10 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode()); } +HInstruction::InstructionKind HInstruction::GetKind() const { + return GetKindInternal(); +} + HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { @@ -967,7 +964,7 @@ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ } -FOR_EACH_INSTRUCTION(DEFINE_ACCEPT) +FOR_EACH_CONCRETE_INSTRUCTION(DEFINE_ACCEPT) #undef DEFINE_ACCEPT @@ -1177,6 +1174,59 @@ void HInstruction::MoveBefore(HInstruction* cursor) { } } +void HInstruction::MoveBeforeFirstUserAndOutOfLoops() { + DCHECK(!CanThrow()); + DCHECK(!HasSideEffects()); + DCHECK(!HasEnvironmentUses()); + DCHECK(HasNonEnvironmentUses()); + DCHECK(!IsPhi()); // Makes no sense for Phi. + DCHECK_EQ(InputCount(), 0u); + + // Find the target block. + HUseIterator<HInstruction*> uses_it(GetUses()); + HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock(); + uses_it.Advance(); + while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) { + uses_it.Advance(); + } + if (!uses_it.Done()) { + // This instruction has uses in two or more blocks. Find the common dominator. + CommonDominator finder(target_block); + for (; !uses_it.Done(); uses_it.Advance()) { + finder.Update(uses_it.Current()->GetUser()->GetBlock()); + } + target_block = finder.Get(); + DCHECK(target_block != nullptr); + } + // Move to the first dominator not in a loop. + while (target_block->IsInLoop()) { + target_block = target_block->GetDominator(); + DCHECK(target_block != nullptr); + } + + // Find insertion position. + HInstruction* insert_pos = nullptr; + for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) { + if (uses_it2.Current()->GetUser()->GetBlock() == target_block && + (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) { + insert_pos = uses_it2.Current()->GetUser(); + } + } + if (insert_pos == nullptr) { + // No user in `target_block`, insert before the control flow instruction. + insert_pos = target_block->GetLastInstruction(); + DCHECK(insert_pos->IsControlFlow()); + // Avoid splitting HCondition from HIf to prevent unnecessary materialization. + if (insert_pos->IsIf()) { + HInstruction* if_input = insert_pos->AsIf()->InputAt(0); + if (if_input == insert_pos->GetPrevious()) { + insert_pos = if_input; + } + } + } + MoveBefore(insert_pos); +} + HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); @@ -1414,6 +1464,24 @@ void HInstructionList::Add(const HInstructionList& instruction_list) { } } +// Should be called on instructions in a dead block in post order. This method +// assumes `insn` has been removed from all users with the exception of catch +// phis because of missing exceptional edges in the graph. It removes the +// instruction from catch phi uses, together with inputs of other catch phis in +// the catch block at the same index, as these must be dead too. +static void RemoveUsesOfDeadInstruction(HInstruction* insn) { + DCHECK(!insn->HasEnvironmentUses()); + while (insn->HasNonEnvironmentUses()) { + HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst(); + size_t use_index = use->GetIndex(); + HBasicBlock* user_block = use->GetUser()->GetBlock(); + DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock()); + for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(use_index); + } + } +} + void HBasicBlock::DisconnectAndDelete() { // Dominators must be removed after all the blocks they dominate. This way // a loop header is removed last, a requirement for correct loop information @@ -1516,21 +1584,13 @@ void HBasicBlock::DisconnectAndDelete() { // graph will always remain consistent. for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { HInstruction* insn = it.Current(); - while (insn->HasUses()) { - DCHECK(IsTryBlock()); - HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst(); - size_t use_index = use->GetIndex(); - HBasicBlock* user_block = use->GetUser()->GetBlock(); - DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock()); - for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - phi_it.Current()->AsPhi()->RemoveInputAt(use_index); - } - } - + RemoveUsesOfDeadInstruction(insn); RemoveInstruction(insn); } for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) { - RemovePhi(it.Current()->AsPhi()); + HPhi* insn = it.Current()->AsPhi(); + RemoveUsesOfDeadInstruction(insn); + RemovePhi(insn); } // Disconnect from the dominator. @@ -1890,7 +1950,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { * | * if_block * / \ - * dummy_block deopt_block + * true_block false_block * \ / * new_pre_header * | @@ -1898,62 +1958,73 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { */ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); + HBasicBlock* old_pre_header = header->GetDominator(); - // Need this to avoid critical edge. + // Need extra block to avoid critical edge. HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - // Need this to avoid critical edge. - HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc()); HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(if_block); - AddBlock(dummy_block); - AddBlock(deopt_block); + AddBlock(true_block); + AddBlock(false_block); AddBlock(new_pre_header); - header->ReplacePredecessor(pre_header, new_pre_header); - pre_header->successors_.clear(); - pre_header->dominated_blocks_.clear(); - - pre_header->AddSuccessor(if_block); - if_block->AddSuccessor(dummy_block); // True successor - if_block->AddSuccessor(deopt_block); // False successor - dummy_block->AddSuccessor(new_pre_header); - deopt_block->AddSuccessor(new_pre_header); - - pre_header->dominated_blocks_.push_back(if_block); - if_block->SetDominator(pre_header); - if_block->dominated_blocks_.push_back(dummy_block); - dummy_block->SetDominator(if_block); - if_block->dominated_blocks_.push_back(deopt_block); - deopt_block->SetDominator(if_block); + header->ReplacePredecessor(old_pre_header, new_pre_header); + old_pre_header->successors_.clear(); + old_pre_header->dominated_blocks_.clear(); + + old_pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(true_block); // True successor + if_block->AddSuccessor(false_block); // False successor + true_block->AddSuccessor(new_pre_header); + false_block->AddSuccessor(new_pre_header); + + old_pre_header->dominated_blocks_.push_back(if_block); + if_block->SetDominator(old_pre_header); + if_block->dominated_blocks_.push_back(true_block); + true_block->SetDominator(if_block); + if_block->dominated_blocks_.push_back(false_block); + false_block->SetDominator(if_block); if_block->dominated_blocks_.push_back(new_pre_header); new_pre_header->SetDominator(if_block); new_pre_header->dominated_blocks_.push_back(header); header->SetDominator(new_pre_header); + // Fix reverse post order. size_t index_of_header = IndexOfElement(reverse_post_order_, header); MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); reverse_post_order_[index_of_header++] = if_block; - reverse_post_order_[index_of_header++] = dummy_block; - reverse_post_order_[index_of_header++] = deopt_block; + reverse_post_order_[index_of_header++] = true_block; + reverse_post_order_[index_of_header++] = false_block; reverse_post_order_[index_of_header++] = new_pre_header; - HLoopInformation* info = pre_header->GetLoopInformation(); - if (info != nullptr) { - if_block->SetLoopInformation(info); - dummy_block->SetLoopInformation(info); - deopt_block->SetLoopInformation(info); - new_pre_header->SetLoopInformation(info); - for (HLoopInformationOutwardIterator loop_it(*pre_header); + // Fix loop information. + HLoopInformation* loop_info = old_pre_header->GetLoopInformation(); + if (loop_info != nullptr) { + if_block->SetLoopInformation(loop_info); + true_block->SetLoopInformation(loop_info); + false_block->SetLoopInformation(loop_info); + new_pre_header->SetLoopInformation(loop_info); + // Add blocks to all enveloping loops. + for (HLoopInformationOutwardIterator loop_it(*old_pre_header); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(if_block); - loop_it.Current()->Add(dummy_block); - loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(true_block); + loop_it.Current()->Add(false_block); loop_it.Current()->Add(new_pre_header); } } + + // Fix try/catch information. + TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock() + ? old_pre_header->GetTryCatchInformation() + : nullptr; + if_block->SetTryCatchInformation(try_catch_info); + true_block->SetTryCatchInformation(try_catch_info); + false_block->SetTryCatchInformation(try_catch_info); + new_pre_header->SetTryCatchInformation(try_catch_info); } void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { @@ -2068,6 +2139,26 @@ void HInvokeStaticOrDirect::RemoveInputAt(size_t index) { } } +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) { + switch (rhs) { + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: + return os << "string_init"; + case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: + return os << "recursive"; + case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: + return os << "direct"; + case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: + return os << "direct_fixup"; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: + return os << "dex_cache_pc_relative"; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: + return os << "dex_cache_via_method"; + default: + LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); + UNREACHABLE(); + } +} + std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) { switch (rhs) { case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit: @@ -2077,7 +2168,8 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckReq case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone: return os << "none"; default: - return os << "unknown:" << static_cast<int>(rhs); + LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs); + UNREACHABLE(); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3b5c384c84..db3e969afc 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -371,6 +371,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + ArtMethod* GetArtMethod() const { return art_method_; } + void SetArtMethod(ArtMethod* method) { art_method_ = method; } + // Returns an instruction with the opposite boolean value from 'cond'. // The instruction has been inserted into the graph, either as a constant, or // before cursor. @@ -479,6 +482,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HCurrentMethod* cached_current_method_; + // The ArtMethod this graph is for. Note that for AOT, it may be null, + // for example for methods whose declaring class could not be resolved + // (such as when the superclass could not be found). + ArtMethod* art_method_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); @@ -556,11 +564,8 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { // Note that `other` *must* be populated before entering this function. bool IsIn(const HLoopInformation& other) const; - // Returns true if instruction is not defined within this loop or any loop nested inside - // this loop. If must_dominate is set, only definitions that actually dominate the loop - // header can be invariant. Otherwise, any definition outside the loop, including - // definitions that appear after the loop, is invariant. - bool IsLoopInvariant(HInstruction* instruction, bool must_dominate) const; + // Returns true if instruction is not defined within this loop. + bool IsDefinedOutOfTheLoop(HInstruction* instruction) const; const ArenaBitVector& GetBlocks() const { return blocks_; } @@ -1029,7 +1034,6 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClearException, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ - M(Condition, BinaryOperation) \ M(CurrentMethod, Instruction) \ M(Deoptimize, Instruction) \ M(Div, BinaryOperation) \ @@ -1062,6 +1066,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(MemoryBarrier, Instruction) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ + M(NativeDebugInfo, Instruction) \ M(Neg, UnaryOperation) \ M(NewArray, Instruction) \ M(NewInstance, Instruction) \ @@ -1077,6 +1082,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Rem, BinaryOperation) \ M(Return, Instruction) \ M(ReturnVoid, Instruction) \ + M(Ror, BinaryOperation) \ M(Shl, BinaryOperation) \ M(Shr, BinaryOperation) \ M(StaticFieldGet, Instruction) \ @@ -1095,13 +1101,20 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#ifndef ART_ENABLE_CODEGEN_arm #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + M(ArmDexCacheArraysBase, Instruction) +#endif #ifndef ART_ENABLE_CODEGEN_arm64 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64DataProcWithShifterOp, Instruction) \ + M(Arm64IntermediateAddress, Instruction) \ + M(Arm64MultiplyAccumulate, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1128,27 +1141,34 @@ class HLoopInformationOutwardIterator : public ValueObject { FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) -#define FOR_EACH_INSTRUCTION(M) \ - FOR_EACH_CONCRETE_INSTRUCTION(M) \ +#define FOR_EACH_ABSTRACT_INSTRUCTION(M) \ + M(Condition, BinaryOperation) \ M(Constant, Instruction) \ M(UnaryOperation, Instruction) \ M(BinaryOperation, Instruction) \ M(Invoke, Instruction) +#define FOR_EACH_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION(M) \ + FOR_EACH_ABSTRACT_INSTRUCTION(M) + #define FORWARD_DECLARATION(type, super) class H##type; FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) #undef FORWARD_DECLARATION #define DECLARE_INSTRUCTION(type) \ - InstructionKind GetKind() const OVERRIDE { return k##type; } \ + InstructionKind GetKindInternal() const OVERRIDE { return k##type; } \ const char* DebugName() const OVERRIDE { return #type; } \ - const H##type* As##type() const OVERRIDE { return this; } \ - H##type* As##type() OVERRIDE { return this; } \ bool InstructionTypeEquals(HInstruction* other) const OVERRIDE { \ return other->Is##type(); \ } \ void Accept(HGraphVisitor* visitor) OVERRIDE +#define DECLARE_ABSTRACT_INSTRUCTION(type) \ + bool Is##type() const { return As##type() != nullptr; } \ + const H##type* As##type() const { return this; } \ + H##type* As##type() { return this; } + template <typename T> class HUseList; template <typename T> @@ -1950,12 +1970,27 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // Move `this` instruction before `cursor`. void MoveBefore(HInstruction* cursor); + // Move `this` before its first user and out of any loops. If there is no + // out-of-loop user that dominates all other users, move the instruction + // to the end of the out-of-loop common dominator of the user's blocks. + // + // This can be used only on non-throwing instructions with no side effects that + // have at least one use but no environment uses. + void MoveBeforeFirstUserAndOutOfLoops(); + +#define INSTRUCTION_TYPE_CHECK(type, super) \ + bool Is##type() const; \ + const H##type* As##type() const; \ + H##type* As##type(); + + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK + #define INSTRUCTION_TYPE_CHECK(type, super) \ bool Is##type() const { return (As##type() != nullptr); } \ virtual const H##type* As##type() const { return nullptr; } \ virtual H##type* As##type() { return nullptr; } - - FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK) + FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK // Returns whether the instruction can be moved within the graph. @@ -1978,7 +2013,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // 2) Their inputs are identical. bool Equals(HInstruction* other) const; - virtual InstructionKind GetKind() const = 0; + // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744) + // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide + // the virtual function because the __attribute__((__pure__)) doesn't really + // apply the strong requirement for virtual functions, preventing optimizations. + InstructionKind GetKind() const PURE; + virtual InstructionKind GetKindInternal() const = 0; virtual size_t ComputeHashCode() const { size_t result = GetKind(); @@ -2276,7 +2316,7 @@ class HConstant : public HExpression<0> { virtual uint64_t GetValueAsUint64() const = 0; - DECLARE_INSTRUCTION(Constant); + DECLARE_ABSTRACT_INSTRUCTION(Constant); private: DISALLOW_COPY_AND_ASSIGN(HConstant); @@ -2447,11 +2487,15 @@ class HTryBoundary : public HTemplateInstruction<0> { // Deoptimize to interpreter, upon checking a condition. class HDeoptimize : public HTemplateInstruction<1> { public: - explicit HDeoptimize(HInstruction* cond, uint32_t dex_pc) + HDeoptimize(HInstruction* cond, uint32_t dex_pc) : HTemplateInstruction(SideEffects::None(), dex_pc) { SetRawInputAt(0, cond); } + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } @@ -2533,7 +2577,7 @@ class HUnaryOperation : public HExpression<1> { virtual HConstant* Evaluate(HIntConstant* x) const = 0; virtual HConstant* Evaluate(HLongConstant* x) const = 0; - DECLARE_INSTRUCTION(UnaryOperation); + DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation); private: DISALLOW_COPY_AND_ASSIGN(HUnaryOperation); @@ -2626,7 +2670,7 @@ class HBinaryOperation : public HExpression<2> { // one. Otherwise it returns null. HInstruction* GetLeastConstantLeft() const; - DECLARE_INSTRUCTION(BinaryOperation); + DECLARE_ABSTRACT_INSTRUCTION(BinaryOperation); private: DISALLOW_COPY_AND_ASSIGN(HBinaryOperation); @@ -2654,7 +2698,7 @@ class HCondition : public HBinaryOperation { // `instruction`, and disregard moves in between. bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const; - DECLARE_INSTRUCTION(Condition); + DECLARE_ABSTRACT_INSTRUCTION(Condition); virtual IfCondition GetCondition() const = 0; @@ -3263,7 +3307,7 @@ class HInvoke : public HInstruction { bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; } - DECLARE_INSTRUCTION(Invoke); + DECLARE_ABSTRACT_INSTRUCTION(Invoke); protected: HInvoke(ArenaAllocator* arena, @@ -3416,7 +3460,7 @@ class HInvokeStaticOrDirect : public HInvoke { MethodReference target_method, DispatchInfo dispatch_info, InvokeType original_invoke_type, - InvokeType invoke_type, + InvokeType optimized_invoke_type, ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, @@ -3430,7 +3474,7 @@ class HInvokeStaticOrDirect : public HInvoke { dex_pc, method_index, original_invoke_type), - invoke_type_(invoke_type), + optimized_invoke_type_(optimized_invoke_type), clinit_check_requirement_(clinit_check_requirement), target_method_(target_method), dispatch_info_(dispatch_info) { } @@ -3476,7 +3520,11 @@ class HInvokeStaticOrDirect : public HInvoke { // platform-specific special input, such as PC-relative addressing base. uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); } - InvokeType GetInvokeType() const { return invoke_type_; } + InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; } + void SetOptimizedInvokeType(InvokeType invoke_type) { + optimized_invoke_type_ = invoke_type; + } + MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } @@ -3499,6 +3547,7 @@ class HInvokeStaticOrDirect : public HInvoke { } bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; } MethodReference GetTargetMethod() const { return target_method_; } + void SetTargetMethod(MethodReference method) { target_method_ = method; } int32_t GetStringInitOffset() const { DCHECK(IsStringInit()); @@ -3524,7 +3573,7 @@ class HInvokeStaticOrDirect : public HInvoke { // Is this instruction a call to a static method? bool IsStatic() const { - return GetInvokeType() == kStatic; + return GetOriginalInvokeType() == kStatic; } // Remove the HClinitCheck or the replacement HLoadClass (set as last input by @@ -3597,7 +3646,7 @@ class HInvokeStaticOrDirect : public HInvoke { void RemoveInputAt(size_t index); private: - const InvokeType invoke_type_; + InvokeType optimized_invoke_type_; ClinitCheckRequirement clinit_check_requirement_; // The target method may refer to different dex file or method index than the original // invoke. This happens for sharpened calls and for calls where a method was redeclared @@ -3607,6 +3656,7 @@ class HInvokeStaticOrDirect : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs); std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); class HInvokeVirtual : public HInvoke { @@ -4168,6 +4218,44 @@ class HXor : public HBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HXor); }; +class HRor : public HBinaryOperation { + public: + HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance) + : HBinaryOperation(result_type, value, distance) {} + + template <typename T, typename U, typename V> + T Compute(T x, U y, V max_shift_value) const { + static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value, + "V is not the unsigned integer type corresponding to T"); + V ux = static_cast<V>(x); + if ((y & max_shift_value) == 0) { + return static_cast<T>(ux); + } else { + const V reg_bits = sizeof(T) * 8; + return static_cast<T>(ux >> (y & max_shift_value)) | + (x << (reg_bits - (y & max_shift_value))); + } + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc()); + } + + DECLARE_INSTRUCTION(Ror); + + private: + DISALLOW_COPY_AND_ASSIGN(HRor); +}; + // The value of a parameter in this method. Its location depends on // the calling convention. class HParameterValue : public HExpression<0> { @@ -4318,9 +4406,13 @@ class HPhi : public HInstruction { : HInstruction(SideEffects::None(), dex_pc), inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)), reg_number_(reg_number), - type_(type), - is_live_(false), + type_(ToPhiType(type)), + // Phis are constructed live and marked dead if conflicting or unused. + // Individual steps of SsaBuilder should assume that if a phi has been + // marked dead, it can be ignored and will be removed by SsaPhiElimination. + is_live_(true), can_be_null_(true) { + DCHECK_NE(type_, Primitive::kPrimVoid); } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. @@ -4781,6 +4873,23 @@ class HSuspendCheck : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; +// Pseudo-instruction which provides the native debugger with mapping information. +// It ensures that we can generate line number and local variables at this point. +class HNativeDebugInfo : public HTemplateInstruction<0> { + public: + explicit HNativeDebugInfo(uint32_t dex_pc) + : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {} + + bool NeedsEnvironment() const OVERRIDE { + return true; + } + + DECLARE_INSTRUCTION(NativeDebugInfo); + + private: + DISALLOW_COPY_AND_ASSIGN(HNativeDebugInfo); +}; + /** * Instruction to load a Class object. */ @@ -4791,13 +4900,15 @@ class HLoadClass : public HExpression<1> { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc, - bool needs_access_check) + bool needs_access_check, + bool is_in_dex_cache) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), type_index_(type_index), dex_file_(dex_file), is_referrers_class_(is_referrers_class), generate_clinit_check_(false), needs_access_check_(needs_access_check), + is_in_dex_cache_(is_in_dex_cache), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. @@ -4822,14 +4933,13 @@ class HLoadClass : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } bool NeedsEnvironment() const OVERRIDE { - // Will call runtime and load the class if the class is not loaded yet. - // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } bool MustGenerateClinitCheck() const { return generate_clinit_check_; } + void SetMustGenerateClinitCheck(bool generate_clinit_check) { // The entrypoint the code generator is going to call does not do // clinit of the class. @@ -4838,7 +4948,9 @@ class HLoadClass : public HExpression<1> { } bool CanCallRuntime() const { - return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_; + return MustGenerateClinitCheck() || + (!is_referrers_class_ && !is_in_dex_cache_) || + needs_access_check_; } bool NeedsAccessCheck() const { @@ -4846,8 +4958,6 @@ class HLoadClass : public HExpression<1> { } bool CanThrow() const OVERRIDE { - // May call runtime and and therefore can throw. - // TODO: finer grain decision. return CanCallRuntime(); } @@ -4869,6 +4979,8 @@ class HLoadClass : public HExpression<1> { return SideEffects::CanTriggerGC(); } + bool IsInDexCache() const { return is_in_dex_cache_; } + DECLARE_INSTRUCTION(LoadClass); private: @@ -4878,7 +4990,8 @@ class HLoadClass : public HExpression<1> { // Whether this instruction must generate the initialization check. // Used for code generation. bool generate_clinit_check_; - bool needs_access_check_; + const bool needs_access_check_; + const bool is_in_dex_cache_; ReferenceTypeInfo loaded_class_rti_; @@ -4887,9 +5000,13 @@ class HLoadClass : public HExpression<1> { class HLoadString : public HExpression<1> { public: - HLoadString(HCurrentMethod* current_method, uint32_t string_index, uint32_t dex_pc) + HLoadString(HCurrentMethod* current_method, + uint32_t string_index, + uint32_t dex_pc, + bool is_in_dex_cache) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), - string_index_(string_index) { + string_index_(string_index), + is_in_dex_cache_(is_in_dex_cache) { SetRawInputAt(0, current_method); } @@ -4907,6 +5024,7 @@ class HLoadString : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { return false; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return true; } bool CanBeNull() const OVERRIDE { return false; } + bool IsInDexCache() const { return is_in_dex_cache_; } static SideEffects SideEffectsForArchRuntimeCalls() { return SideEffects::CanTriggerGC(); @@ -4916,6 +5034,7 @@ class HLoadString : public HExpression<1> { private: const uint32_t string_index_; + const bool is_in_dex_cache_; DISALLOW_COPY_AND_ASSIGN(HLoadString); }; @@ -5556,6 +5675,9 @@ class HParallelMove : public HTemplateInstruction<0> { } // namespace art +#ifdef ART_ENABLE_CODEGEN_arm +#include "nodes_arm.h" +#endif #ifdef ART_ENABLE_CODEGEN_arm64 #include "nodes_arm64.h" #endif @@ -5783,6 +5905,18 @@ inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) { return &lhs == &rhs; } +#define INSTRUCTION_TYPE_CHECK(type, super) \ + inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ + inline const H##type* HInstruction::As##type() const { \ + return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ + } \ + inline H##type* HInstruction::As##type() { \ + return Is##type() ? static_cast<H##type*>(this) : nullptr; \ + } + + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h new file mode 100644 index 0000000000..6a1dbb9e70 --- /dev/null +++ b/compiler/optimizing/nodes_arm.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_ +#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_ + +namespace art { + +class HArmDexCacheArraysBase : public HExpression<0> { + public: + explicit HArmDexCacheArraysBase(const DexFile& dex_file) + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), + dex_file_(&dex_file), + element_offset_(static_cast<size_t>(-1)) { } + + void UpdateElementOffset(size_t element_offset) { + // Use the lowest offset from the requested elements so that all offsets from + // this base are non-negative because our assemblers emit negative-offset loads + // as a sequence of two or more instructions. (However, positive offsets beyond + // 4KiB also require two or more instructions, so this simple heuristic could + // be improved for cases where there is a dense cluster of elements far from + // the lowest offset. This is expected to be rare enough though, so we choose + // not to spend compile time on elaborate calculations.) + element_offset_ = std::min(element_offset_, element_offset); + } + + const DexFile& GetDexFile() const { + return *dex_file_; + } + + size_t GetElementOffset() const { + return element_offset_; + } + + DECLARE_INSTRUCTION(ArmDexCacheArraysBase); + + private: + const DexFile* dex_file_; + size_t element_offset_; + + DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_ARM_H_ diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc new file mode 100644 index 0000000000..ac2f093847 --- /dev/null +++ b/compiler/optimizing/nodes_arm64.cc @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_arm64.h" +#include "nodes.h" + +namespace art { + +using arm64::helpers::CanFitInShifterOperand; + +void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount) { + DCHECK(CanFitInShifterOperand(instruction)); + if (instruction->IsShl()) { + *op_kind = kLSL; + *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsShr()) { + *op_kind = kASR; + *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsUShr()) { + *op_kind = kLSR; + *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue(); + } else { + DCHECK(instruction->IsTypeConversion()); + Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType(); + Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType(); + int result_size = Primitive::ComponentSize(result_type); + int input_size = Primitive::ComponentSize(input_type); + int min_size = std::min(result_size, input_size); + // This follows the logic in + // `InstructionCodeGeneratorARM64::VisitTypeConversion()`. + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + // There is actually nothing to do. The register will be used as a W + // register, discarding the top bits. This is represented by the default + // encoding 'LSL 0'. + *op_kind = kLSL; + *shift_amount = 0; + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + *op_kind = kUXTH; + } else { + switch (min_size) { + case 1: *op_kind = kSXTB; break; + case 2: *op_kind = kSXTH; break; + case 4: *op_kind = kSXTW; break; + default: + LOG(FATAL) << "Unexpected min size " << min_size; + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) { + switch (op) { + case HArm64DataProcWithShifterOp::kLSL: return os << "LSL"; + case HArm64DataProcWithShifterOp::kLSR: return os << "LSR"; + case HArm64DataProcWithShifterOp::kASR: return os << "ASR"; + case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB"; + case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH"; + case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW"; + case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB"; + case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH"; + case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW"; + default: + LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op); + UNREACHABLE(); + } +} + +} // namespace art diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 885d3a29ee..18405f2623 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -17,8 +17,83 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ #define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ +#include "nodes.h" + namespace art { +class HArm64DataProcWithShifterOp : public HExpression<2> { + public: + enum OpKind { + kLSL, // Logical shift left. + kLSR, // Logical shift right. + kASR, // Arithmetic shift right. + kUXTB, // Unsigned extend byte. + kUXTH, // Unsigned extend half-word. + kUXTW, // Unsigned extend word. + kSXTB, // Signed extend byte. + kSXTH, // Signed extend half-word. + kSXTW, // Signed extend word. + + // Aliases. + kFirstShiftOp = kLSL, + kLastShiftOp = kASR, + kFirstExtensionOp = kUXTB, + kLastExtensionOp = kSXTW + }; + HArm64DataProcWithShifterOp(HInstruction* instr, + HInstruction* left, + HInstruction* right, + OpKind op, + // The shift argument is unused if the operation + // is an extension. + int shift = 0, + uint32_t dex_pc = kNoDexPc) + : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) { + DCHECK(!instr->HasSideEffects()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE { + HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp(); + return instr_kind_ == other->instr_kind_ && + op_kind_ == other->op_kind_ && + shift_amount_ == other->shift_amount_; + } + + static bool IsShiftOp(OpKind op_kind) { + return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp; + } + + static bool IsExtensionOp(OpKind op_kind) { + return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp; + } + + // Find the operation kind and shift amount from a bitfield move instruction. + static void GetOpInfoFromInstruction(HInstruction* bitfield_op, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount); + + InstructionKind GetInstrKind() const { return instr_kind_; } + OpKind GetOpKind() const { return op_kind_; } + int GetShiftAmount() const { return shift_amount_; } + + DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp); + + private: + InstructionKind instr_kind_; + OpKind op_kind_; + int shift_amount_; + + friend std::ostream& operator<<(std::ostream& os, OpKind op); + + DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp); +}; + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); + // This instruction computes an intermediate address pointing in the 'middle' of an object. The // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is // never used across anything that can trigger GC. @@ -42,6 +117,40 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; +class HArm64MultiplyAccumulate : public HExpression<3> { + public: + HArm64MultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 34f1fe5949..2b0d522b31 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -25,6 +25,7 @@ #include "utils/assembler.h" #include "utils/arm/assembler_thumb2.h" #include "utils/mips/assembler_mips.h" +#include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -212,6 +213,34 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +TEST_F(OptimizingCFITest, kMips64Adjust) { + // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector<uint8_t> expected_asm( + expected_asm_kMips64_adjust_head, + expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips64_adjust_tail, + expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kMips64_adjust, + expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); + SetUpFrame(kMips64); +#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> + mips64::Mips64Label target; + __ Beqc(mips64::A1, mips64::A2, &target); + // Push the target out of range of BEQC. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); +} + #endif // __ANDROID__ } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 4571ebf2d4..de857295c7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -413,3 +413,57 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x0002007c: nop // 0x00020080: .cfi_restore_state // 0x00020080: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60, + 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, +}; +static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { + 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, + 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, + 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64_adjust[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, + 0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: bnec r5, r6, 0x0000002c ; +12 +// 0x00000024: auipc r1, 2 +// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080 +// 0x0000002c: nop +// ... +// 0x0002002c: nop +// 0x00020030: .cfi_remember_state +// 0x00020030: daddiu r29, r29, 24 +// 0x00020034: .cfi_def_cfa_offset: 40 +// 0x00020034: ldc1 f24, +0(r29) +// 0x00020038: ldc1 f25, +8(r29) +// 0x0002003c: ld r16, +16(r29) +// 0x00020040: .cfi_restore: r16 +// 0x00020040: ld r17, +24(r29) +// 0x00020044: .cfi_restore: r17 +// 0x00020044: ld r31, +32(r29) +// 0x00020048: .cfi_restore: r31 +// 0x00020048: daddiu r29, r29, 40 +// 0x0002004c: .cfi_def_cfa_offset: 0 +// 0x0002004c: jr r31 +// 0x00020050: nop +// 0x00020054: .cfi_restore_state +// 0x00020054: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index dec08d8978..831b626c4f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -20,6 +20,10 @@ #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm64 +#include "dex_cache_array_fixups_arm.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif @@ -391,10 +395,11 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } -// Read barrier are supported only on ARM, x86 and x86-64 at the moment. +// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment. // TODO: Add support for other architectures and remove this function static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { - return instruction_set == kThumb2 + return instruction_set == kArm64 + || instruction_set == kThumb2 || instruction_set == kX86 || instruction_set == kX86_64; } @@ -422,7 +427,7 @@ static void MaybeRunInliner(HGraph* graph, return; } HInliner* inliner = new (graph->GetArena()) HInliner( - graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); HOptimization* optimizations[] = { inliner }; RunOptimizations(optimizations, arraysize(optimizations), pass_observer); @@ -434,6 +439,17 @@ static void RunArchOptimizations(InstructionSet instruction_set, PassObserver* pass_observer) { ArenaAllocator* arena = graph->GetArena(); switch (instruction_set) { +#ifdef ART_ENABLE_CODEGEN_arm + case kThumb2: + case kArm: { + arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); + HOptimization* arm_optimizations[] = { + fixups + }; + RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); + break; + } +#endif #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: { arm64::InstructionSimplifierArm64* simplifier = @@ -499,12 +515,13 @@ static void RunOptimizations(HGraph* graph, InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce"); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); LICM* licm = new (arena) LICM(graph, *side_effects); LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); - BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); ReferenceTypePropagation* type_propagation = new (arena) ReferenceTypePropagation(graph, &handles); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); @@ -514,7 +531,6 @@ static void RunOptimizations(HGraph* graph, graph, stats, "instruction_simplifier_after_bce"); InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_before_codegen"); - IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations1[] = { @@ -526,48 +542,30 @@ static void RunOptimizations(HGraph* graph, dce1, simplify2 }; - RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles); - // TODO: Update passes incompatible with try/catch so we have the same - // pipeline for all methods. - if (graph->HasTryCatch()) { - HOptimization* optimizations2[] = { - boolean_simplify, - side_effects, - gvn, - dce2, - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. For example, the code generator does not expect to see a - // HTypeConversion from a type to the same type. - simplify4, - }; - - RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); - } else { - HOptimization* optimizations2[] = { - // BooleanSimplifier depends on the InstructionSimplifier removing - // redundant suspend checks to recognize empty blocks. - boolean_simplify, - fold2, // TODO: if we don't inline we can also skip fold2. - side_effects, - gvn, - licm, - induction, - bce, - simplify3, - lse, - dce2, - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. For example, the code generator does not expect to see a - // HTypeConversion from a type to the same type. - simplify4, - }; - - RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); - } + HOptimization* optimizations2[] = { + // BooleanSimplifier depends on the InstructionSimplifier removing + // redundant suspend checks to recognize empty blocks. + boolean_simplify, + fold2, // TODO: if we don't inline we can also skip fold2. + side_effects, + gvn, + licm, + induction, + bce, + fold3, // evaluates code generated by dynamic bce + simplify3, + lse, + dce2, + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. For example, the code generator does not expect to see a + // HTypeConversion from a type to the same type. + simplify4, + }; + RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer); AllocateRegisters(graph, codegen, pass_observer); @@ -606,8 +604,6 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, stack_map.resize(codegen->ComputeStackMapsSize()); codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size())); - MaybeRecordStat(MethodCompilationStat::kCompiledOptimized); - CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), @@ -618,7 +614,7 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), + ArrayRef<const SrcMapElem>(), ArrayRef<const uint8_t>(), // mapping_table. ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(), // native_gc_map. @@ -642,7 +638,6 @@ CompiledMethod* OptimizingCompiler::EmitBaseline( ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps)); codegen->BuildNativeGCMap(&gc_map, *compiler_driver); - MaybeRecordStat(MethodCompilationStat::kCompiledBaseline); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), @@ -653,7 +648,7 @@ CompiledMethod* OptimizingCompiler::EmitBaseline( codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), + ArrayRef<const SrcMapElem>(), AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), @@ -748,8 +743,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, ArtMethod* art_method = compiler_driver->ResolveMethod( soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type); // We may not get a method, for example if its class is erroneous. - // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile. if (art_method != nullptr) { + graph->SetArtMethod(art_method); interpreter_metadata = art_method->GetQuickenedInfo(); } } @@ -846,6 +841,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, dex_file, dex_cache)); if (codegen.get() != nullptr) { + MaybeRecordStat(MethodCompilationStat::kCompiled); if (run_optimizations_) { method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver); } else { @@ -856,7 +852,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); } else { - MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError); } } @@ -932,6 +928,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, if (stack_map_data == nullptr) { return false; } + MaybeRecordStat(MethodCompilationStat::kCompiled); codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size)); const void* code = code_cache->CommitCode( self, diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 6375cf1a56..6296eedfb0 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_ -#include <sstream> +#include <iomanip> #include <string> #include <type_traits> @@ -27,18 +27,18 @@ namespace art { enum MethodCompilationStat { kAttemptCompilation = 0, - kCompiledBaseline, - kCompiledOptimized, + kCompiled, kInlinedInvoke, kInstructionSimplifications, kInstructionSimplificationsArch, kUnresolvedMethod, kUnresolvedField, kUnresolvedFieldNotAFastAccess, + kRemovedCheckedCast, + kRemovedDeadInstruction, + kRemovedNullCheck, kNotCompiledBranchOutsideMethodCode, kNotCompiledCannotBuildSSA, - kNotCompiledCantAccesType, - kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, kNotCompiledMalformedOpcode, @@ -47,13 +47,12 @@ enum MethodCompilationStat { kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, kNotCompiledUnsupportedIsa, + kNotCompiledVerificationError, kNotCompiledVerifyAtRuntime, - kNotOptimizedDisabled, - kNotOptimizedRegisterAllocator, - kNotOptimizedTryCatch, - kRemovedCheckedCast, - kRemovedDeadInstruction, - kRemovedNullCheck, + kInlinedMonomorphicCall, + kMonomorphicCall, + kPolymorphicCall, + kMegamorphicCall, kLastStat }; @@ -66,20 +65,19 @@ class OptimizingCompilerStats { } void Log() const { + if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) { + // Log only in debug builds or if the compiler is verbose. + return; + } + if (compile_stats_[kAttemptCompilation] == 0) { LOG(INFO) << "Did not compile any method."; } else { - size_t unoptimized_percent = - compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation]; - size_t optimized_percent = - compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation]; - std::ostringstream oss; - oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: "; - - oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, "; - oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, "; - - LOG(INFO) << oss.str(); + float compiled_percent = + compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation]; + LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation] + << " methods: " << std::fixed << std::setprecision(2) + << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled."; for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { @@ -92,41 +90,42 @@ class OptimizingCompilerStats { private: std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { + std::string name; switch (stat) { - case kAttemptCompilation : return "kAttemptCompilation"; - case kCompiledBaseline : return "kCompiledBaseline"; - case kCompiledOptimized : return "kCompiledOptimized"; - case kInlinedInvoke : return "kInlinedInvoke"; - case kInstructionSimplifications: return "kInstructionSimplifications"; - case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch"; - case kUnresolvedMethod : return "kUnresolvedMethod"; - case kUnresolvedField : return "kUnresolvedField"; - case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess"; - case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode"; - case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; - case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; - case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; - case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; - case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; - case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode"; - case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledPathological : return "kNotCompiledPathological"; - case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; - case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; - case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; - case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; - case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; - case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; - case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; - case kRemovedCheckedCast: return "kRemovedCheckedCast"; - case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; - case kRemovedNullCheck: return "kRemovedNullCheck"; - - case kLastStat: break; // Invalid to print out. + case kAttemptCompilation : name = "AttemptCompilation"; break; + case kCompiled : name = "Compiled"; break; + case kInlinedInvoke : name = "InlinedInvoke"; break; + case kInstructionSimplifications: name = "InstructionSimplifications"; break; + case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break; + case kUnresolvedMethod : name = "UnresolvedMethod"; break; + case kUnresolvedField : name = "UnresolvedField"; break; + case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break; + case kRemovedCheckedCast: name = "RemovedCheckedCast"; break; + case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break; + case kRemovedNullCheck: name = "RemovedNullCheck"; break; + case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break; + case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break; + case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break; + case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break; + case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break; + case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break; + case kNotCompiledPathological : name = "NotCompiledPathological"; break; + case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break; + case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break; + case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break; + case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break; + case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break; + case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break; + case kMonomorphicCall: name = "MonomorphicCall"; break; + case kPolymorphicCall: name = "PolymorphicCall"; break; + case kMegamorphicCall: name = "kMegamorphicCall"; break; + + case kLastStat: + LOG(FATAL) << "invalid stat " + << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); + UNREACHABLE(); } - LOG(FATAL) << "invalid stat " - << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); - UNREACHABLE(); + return "OptStat#" + name; } AtomicInteger compile_stats_[kLastStat]; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 30bcf19c64..176c50ce21 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -169,7 +169,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // If `other_move` was swapped, we iterate again to find a new // potential cycle. required_swap = nullptr; - i = 0; + i = -1; } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the // move by just returning from this `PerforrmMove`. diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 46e6f3e5d0..5e8fe37669 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -609,4 +609,36 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { } } +TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TypeParam resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(0), + Location::RegisterLocation(3), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::RegisterPairLocation(0, 1), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(2), + Primitive::kPrimInt, + nullptr); + resolver.EmitNativeCode(moves); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2,3 <-> 0,1) (2 -> 3) (7 -> 2)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2,3 -> T0,T1) (0 -> 3) (T0,T1 -> 0,1) (7 -> 2)", + resolver.GetMessage().c_str()); + } + } +} + } // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 808a1dc6c2..a385448104 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -15,6 +15,7 @@ */ #include "pc_relative_fixups_x86.h" +#include "code_generator_x86.h" namespace art { namespace x86 { @@ -26,6 +27,15 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { public: explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {} + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } + private: void VisitAdd(HAdd* add) OVERRIDE { BinaryFP(add); @@ -70,9 +80,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + if (switch_insn->GetNumEntries() <= + InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) { + return; + } // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. - InitializePCRelativeBasePointer(switch_insn); + InitializePCRelativeBasePointer(); HGraph* graph = GetGraph(); HBasicBlock* block = switch_insn->GetBlock(); HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( @@ -84,22 +98,22 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch); } - void InitializePCRelativeBasePointer(HInstruction* user) { + void InitializePCRelativeBasePointer() { // Ensure we only initialize the pointer once. if (base_ != nullptr) { return; } - HGraph* graph = GetGraph(); - HBasicBlock* entry = graph->GetEntryBlock(); - base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress(); - HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction(); - entry->InsertInstructionBefore(base_, insert_pos); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); DCHECK(base_ != nullptr); } void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { - InitializePCRelativeBasePointer(insn); + InitializePCRelativeBasePointer(); HX86LoadFromConstantTable* load_constant = new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); insn->GetBlock()->InsertInstructionBefore(load_constant, insn); @@ -111,7 +125,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // addressing, we need the PC-relative address base. HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { - InitializePCRelativeBasePointer(invoke); + InitializePCRelativeBasePointer(); // Add the extra parameter base_. DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); invoke_static_or_direct->AddSpecialInput(base_); @@ -133,6 +147,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void PcRelativeFixups::Run() { PCRelativeHandlerVisitor visitor(graph_); visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); } } // namespace x86 diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index c98f43e461..bde54ee977 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -63,7 +63,6 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { - equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } else if (equivalent == input) { // The input has changed its type. It can be an input of other phis, diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 0d05c49fc5..fea903d9cf 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -127,6 +127,87 @@ void ReferenceTypePropagation::ValidateTypes() { } } +static void CheckHasNoTypedInputs(HInstruction* root_instr) { + ArenaAllocatorAdapter<void> adapter = + root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation); + + ArenaVector<HPhi*> visited_phis(adapter); + ArenaVector<HInstruction*> worklist(adapter); + worklist.push_back(root_instr); + + while (!worklist.empty()) { + HInstruction* instr = worklist.back(); + worklist.pop_back(); + + if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) { + // Expect that both `root_instr` and its inputs have invalid RTI. + ScopedObjectAccess soa(Thread::Current()); + DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI."; + + // Insert all unvisited inputs to the worklist. + for (HInputIterator it(instr); !it.Done(); it.Advance()) { + HInstruction* input = it.Current(); + if (input->IsPhi()) { + if (ContainsElement(visited_phis, input->AsPhi())) { + continue; + } else { + visited_phis.push_back(input->AsPhi()); + } + } + worklist.push_back(input); + } + } else if (instr->IsNullConstant()) { + // The only input of `root_instr` allowed to have valid RTI because it is ignored. + } else { + LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI " + << instr->GetReferenceTypeInfo(); + UNREACHABLE(); + } + } +} + +template<typename Functor> +static void ForEachUntypedInstruction(HGraph* graph, Functor fn) { + ScopedObjectAccess soa(Thread::Current()); + for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) { + for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { + fn(instr); + } + } + for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { + fn(instr); + } + } + } +} + +void ReferenceTypePropagation::SetUntypedInstructionsToObject() { + // In some cases, the fix-point iteration will leave kPrimNot instructions with + // invalid RTI because bytecode does not provide enough typing information. + // Set the RTI of such instructions to Object. + // Example: + // MyClass a = null, b = null; + // while (a == null) { + // if (cond) { a = b; } else { b = a; } + // } + + if (kIsDebugBuild) { + // Test that if we are going to set RTI from invalid to Object, that + // instruction did not have any typed instructions in its def-use chain + // and therefore its type could not be inferred. + ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); }); + } + + ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false); + ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) { + instr->SetReferenceTypeInfo(obj_rti); + }); +} + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -136,6 +217,7 @@ void ReferenceTypePropagation::Run() { } ProcessWorklist(); + SetUntypedInstructionsToObject(); ValidateTypes(); } @@ -387,7 +469,7 @@ void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, // but then we would need to pass it to RTPVisitor just for this debug check. Since // the method is from the String class, the null loader is good enough. Handle<mirror::ClassLoader> loader; - ArtMethod* method = cl->ResolveMethod( + ArtMethod* method = cl->ResolveMethod<ClassLinker::kNoICCECheckForCache>( invoke->GetDexFile(), invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect); DCHECK(method != nullptr); mirror::Class* declaring_class = method->GetDeclaringClass(); @@ -534,8 +616,9 @@ void RTPVisitor::VisitLoadException(HLoadException* instr) { void RTPVisitor::VisitNullCheck(HNullCheck* instr) { ScopedObjectAccess soa(Thread::Current()); ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - DCHECK(parent_rti.IsValid()); - instr->SetReferenceTypeInfo(parent_rti); + if (parent_rti.IsValid()) { + instr->SetReferenceTypeInfo(parent_rti); + } } void RTPVisitor::VisitFakeString(HFakeString* instr) { @@ -588,11 +671,16 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) { } if (phi->GetBlock()->IsLoopHeader()) { + ScopedObjectAccess soa(Thread::Current()); // Set the initial type for the phi. Use the non back edge input for reaching // a fixed point faster. + HInstruction* first_input = phi->InputAt(0); + ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo(); + if (first_input_rti.IsValid() && !first_input->IsNullConstant()) { + phi->SetCanBeNull(first_input->CanBeNull()); + phi->SetReferenceTypeInfo(first_input_rti); + } AddToWorklist(phi); - phi->SetCanBeNull(phi->InputAt(0)->CanBeNull()); - phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo()); } else { // Eagerly compute the type of the phi, for quicker convergence. Note // that we don't need to add users to the worklist because we are @@ -653,7 +741,9 @@ static void UpdateArrayGet(HArrayGet* instr, DCHECK_EQ(Primitive::kPrimNot, instr->GetType()); ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - DCHECK(parent_rti.IsValid()); + if (!parent_rti.IsValid()) { + return; + } Handle<mirror::Class> handle = parent_rti.GetTypeHandle(); if (handle->IsObjectArrayClass()) { @@ -665,8 +755,6 @@ static void UpdateArrayGet(HArrayGet* instr, instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false)); } - - return; } bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { @@ -683,7 +771,7 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { instr->SetReferenceTypeInfo(parent_rti); } } else if (instr->IsArrayGet()) { - // TODO: consider if it's worth "looking back" and bounding the input object + // TODO: consider if it's worth "looking back" and binding the input object // to an array type. UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_); } else { @@ -711,6 +799,7 @@ void RTPVisitor::VisitArrayGet(HArrayGet* instr) { if (instr->GetType() != Primitive::kPrimNot) { return; } + ScopedObjectAccess soa(Thread::Current()); UpdateArrayGet(instr, handles_, object_class_handle_); if (!instr->GetReferenceTypeInfo().IsValid()) { @@ -770,7 +859,10 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { } } } - instr->SetReferenceTypeInfo(new_rti); + + if (new_rti.IsValid()) { + instr->SetReferenceTypeInfo(new_rti); + } } // Re-computes and updates the nullability of the instruction. Returns whether or diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 5c05592726..21789e1331 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -57,6 +57,7 @@ class ReferenceTypePropagation : public HOptimization { SHARED_REQUIRES(Locks::mutator_lock_); void ValidateTypes(); + void SetUntypedInstructionsToObject(); StackHandleScopeCollection* handles_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 080f970756..8706854a6a 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -472,7 +472,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, HInstruction** input2) { HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue( @@ -624,7 +624,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, HInstruction** field, HInstruction** ret) { HGraph* graph = CreateGraph(allocator); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index a128079cdb..5e1d1d9954 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -49,7 +49,8 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { } // TODO: Avoid CompilerDriver. - InvokeType invoke_type = invoke->GetOriginalInvokeType(); + InvokeType original_invoke_type = invoke->GetOriginalInvokeType(); + InvokeType optimized_invoke_type = original_invoke_type; MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex()); int vtable_idx; uintptr_t direct_code, direct_method; @@ -58,15 +59,18 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { invoke->GetDexPc(), false /* update_stats: already updated in builder */, true /* enable_devirtualization */, - &invoke_type, + &optimized_invoke_type, &target_method, &vtable_idx, &direct_code, &direct_method); - DCHECK(success); - DCHECK_EQ(invoke_type, invoke->GetInvokeType()); - DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file); - DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index); + if (!success) { + // TODO: try using kDexCachePcRelative. It's always a valid method load + // kind as long as it's supported by the codegen + return; + } + invoke->SetOptimizedInvokeType(optimized_invoke_type); + invoke->SetTargetMethod(target_method); HInvokeStaticOrDirect::MethodLoadKind method_load_kind; HInvokeStaticOrDirect::CodePtrLocation code_ptr_location; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 5190eb3b26..9e6cfbe653 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -22,6 +22,13 @@ namespace art { +// Returns whether this is a loop header phi which was eagerly created but later +// found inconsistent due to the vreg being undefined in one of its predecessors. +// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. +static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { + return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); +} + /** * A debuggable application may require to reviving phis, to ensure their * associated DEX register is available to a debugger. This class implements @@ -165,17 +172,15 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); + if (IsUndefinedLoopHeaderPhi(phi)) { + DCHECK(phi->IsDead()); + continue; + } if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi to guarantee convergence of the algorithm. - // Note that the dead phi may already have a type if it is an equivalent - // generated for a typed LoadLocal. In that case we do not change the - // type because it could lead to an unsupported PrimNot/Float/Double -> - // PrimInt/Long transition and create same type equivalents. - if (phi->GetType() == Primitive::kPrimVoid) { - phi->SetType(phi->InputAt(0)->GetType()); - } + // Loop phis must have a type to guarantee convergence of the algorithm. + DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -220,6 +225,27 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } +void SsaBuilder::SetLoopHeaderPhiInputs() { + for (size_t i = loop_headers_.size(); i > 0; --i) { + HBasicBlock* block = loop_headers_[i - 1]; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + size_t vreg = phi->GetRegNumber(); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + HInstruction* value = ValueOfLocal(predecessor, vreg); + if (value == nullptr) { + // Vreg is undefined at this predecessor. Mark it dead and leave with + // fewer inputs than predecessors. SsaChecker will fail if not removed. + phi->SetDead(); + break; + } else { + phi->AddInput(value); + } + } + } + } +} + void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -283,15 +309,7 @@ void SsaBuilder::BuildSsa() { } // 2) Set inputs of loop phis. - for (HBasicBlock* block : loop_headers_) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber()); - phi->AddInput(input); - } - } - } + SetLoopHeaderPhiInputs(); // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but @@ -403,8 +421,13 @@ ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { for (size_t i = 0; i < vregs; ++i) { // No point in creating the catch phi if it is already undefined at // the first throwing instruction. - if ((*current_locals_)[i] != nullptr) { - HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + HInstruction* current_local_value = (*current_locals_)[i]; + if (current_local_value != nullptr) { + HPhi* phi = new (arena) HPhi( + arena, + i, + 0, + current_local_value->GetType()); block->AddPhi(phi); (*locals)[i] = phi; } @@ -451,7 +474,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local); if (incoming != nullptr) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + 0, + incoming->GetType()); block->AddPhi(phi); (*current_locals_)[local] = phi; } @@ -484,8 +510,12 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } if (is_different) { + HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local); HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + block->GetPredecessors().size(), + first_input->GetType()); for (size_t i = 0; i < block->GetPredecessors().size(); i++) { HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local); phi->SetRawInputAt(i, pred_value); @@ -583,8 +613,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: phi->GetBlock()->InsertPhiAfter(new_phi, phi); return new_phi; } else { - DCHECK_EQ(next->GetType(), type); - return next->AsPhi(); + HPhi* next_phi = next->AsPhi(); + DCHECK_EQ(next_phi->GetType(), type); + if (next_phi->IsDead()) { + // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) + // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This + // cannot revive undefined loop header phis because they cannot have uses. + DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); + next_phi->SetLive(); + } + return next_phi; } } @@ -638,7 +676,36 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1); + uint32_t reg_number = store->GetLocal()->GetRegNumber(); + HInstruction* stored_value = store->InputAt(1); + Primitive::Type stored_type = stored_value->GetType(); + DCHECK_NE(stored_type, Primitive::kPrimVoid); + + // Storing into vreg `reg_number` may implicitly invalidate the surrounding + // registers. Consider the following cases: + // (1) Storing a wide value must overwrite previous values in both `reg_number` + // and `reg_number+1`. We store `nullptr` in `reg_number+1`. + // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number` + // must invalidate it. We store `nullptr` in `reg_number-1`. + // Consequently, storing a wide value into the high vreg of another wide value + // will invalidate both `reg_number-1` and `reg_number+1`. + + if (reg_number != 0) { + HInstruction* local_low = (*current_locals_)[reg_number - 1]; + if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) { + // The vreg we are storing into was previously the high vreg of a pair. + // We need to invalidate its low vreg. + DCHECK((*current_locals_)[reg_number] == nullptr); + (*current_locals_)[reg_number - 1] = nullptr; + } + } + + (*current_locals_)[reg_number] = stored_value; + if (Primitive::Is64BitType(stored_type)) { + // We are storing a pair. Invalidate the instruction in the high vreg. + (*current_locals_)[reg_number + 1] = nullptr; + } + store->GetBlock()->RemoveInstruction(store); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 79f1a28ac8..dcce5e4c2c 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -81,6 +81,7 @@ class SsaBuilder : public HGraphVisitor { static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: + void SetLoopHeaderPhiInputs(); void FixNullConstantType(); void EquivalentPhisCleanup(); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 72f9ddd506..a3219dcc38 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -16,6 +16,8 @@ #include "ssa_phi_elimination.h" +#include "base/arena_containers.h" + namespace art { void SsaDeadPhiElimination::Run() { @@ -24,22 +26,36 @@ void SsaDeadPhiElimination::Run() { } void SsaDeadPhiElimination::MarkDeadPhis() { + // Phis are constructed live and should not be revived if previously marked + // dead. This algorithm temporarily breaks that invariant but we DCHECK that + // only phis which were initially live are revived. + ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter()); + // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); - // Set dead ahead of running through uses. The phi may have no use. - phi->SetDead(); + if (phi->IsDead()) { + continue; + } + + bool has_non_phi_use = false; for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* current = use_it.Current(); - HInstruction* user = current->GetUser(); - if (!user->IsPhi()) { - worklist_.push_back(phi); - phi->SetLive(); + if (!use_it.Current()->GetUser()->IsPhi()) { + has_non_phi_use = true; break; } } + + if (has_non_phi_use) { + worklist_.push_back(phi); + } else { + phi->SetDead(); + if (kIsDebugBuild) { + initially_live.insert(phi); + } + } } } @@ -48,10 +64,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist_.back(); worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { - HInstruction* input = it.Current(); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.push_back(input->AsPhi()); - input->AsPhi()->SetLive(); + HPhi* input = it.Current()->AsPhi(); + if (input != nullptr && input->IsDead()) { + // Input is a dead phi. Revive it and add to the worklist. We make sure + // that the phi was not dead initially (see definition of `initially_live`). + DCHECK(ContainsElement(initially_live, input)); + input->SetLive(); + worklist_.push_back(input); } } } @@ -118,7 +137,6 @@ void SsaRedundantPhiElimination::Run() { } if (phi->InputCount() == 0) { - DCHECK(phi->IsCatchPhi()); DCHECK(phi->IsDead()); continue; } |